rumdl_lib/utils/
mkdocs_extensions.rs

1/// MkDocs PyMdown extensions support
2///
3/// This module provides support for various PyMdown Markdown extensions
4/// commonly used with MkDocs Material:
5///
6/// - **InlineHilite**: Inline code highlighting `` `#!python code` ``
7/// - **Keys**: Keyboard key notation `++ctrl+alt+delete++`
8/// - **Caret**: Superscript and insert `^superscript^` and `^^insert^^`
9/// - **Mark**: Highlight text `==highlighted==`
10/// - **SmartSymbols**: Auto-replace symbols `(c)` → `©`
11///
12/// ## References
13///
14/// - [PyMdown Extensions](https://facelessuser.github.io/pymdown-extensions/)
15/// - [InlineHilite](https://facelessuser.github.io/pymdown-extensions/extensions/inlinehilite/)
16/// - [Keys](https://facelessuser.github.io/pymdown-extensions/extensions/keys/)
17/// - [Caret](https://facelessuser.github.io/pymdown-extensions/extensions/caret/)
18/// - [Mark](https://facelessuser.github.io/pymdown-extensions/extensions/mark/)
19/// - [SmartSymbols](https://facelessuser.github.io/pymdown-extensions/extensions/smartsymbols/)
20use regex::Regex;
21use std::sync::LazyLock;
22
23// ============================================================================
24// InlineHilite: `#!lang code` syntax for inline code with syntax highlighting
25// ============================================================================
26
27/// Pattern to match InlineHilite syntax: `#!language code`
28/// Examples: `#!python print("hello")`, `#!js alert('hi')`
29static INLINE_HILITE_PATTERN: LazyLock<Regex> =
30    LazyLock::new(|| Regex::new(r"`#!([a-zA-Z][a-zA-Z0-9_+-]*)\s+[^`]+`").unwrap());
31
32/// Pattern to match inline hilite shebang at the start of backtick content
33static INLINE_HILITE_SHEBANG: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#!([a-zA-Z][a-zA-Z0-9_+-]*)").unwrap());
34
35/// Check if a line contains InlineHilite syntax
36#[inline]
37pub fn contains_inline_hilite(line: &str) -> bool {
38    if !line.contains('`') || !line.contains("#!") {
39        return false;
40    }
41    INLINE_HILITE_PATTERN.is_match(line)
42}
43
44/// Check if code span content starts with InlineHilite shebang
45#[inline]
46pub fn is_inline_hilite_content(content: &str) -> bool {
47    INLINE_HILITE_SHEBANG.is_match(content)
48}
49
50// ============================================================================
51// Keys: ++key++ syntax for keyboard keys
52// ============================================================================
53
54/// Pattern to match keyboard key notation: `++key++` or `++key1+key2++`
55/// Examples: ++ctrl++, ++ctrl+alt+delete++, ++cmd+shift+p++
56static KEYS_PATTERN: LazyLock<Regex> =
57    LazyLock::new(|| Regex::new(r"\+\+([a-zA-Z0-9_-]+(?:\+[a-zA-Z0-9_-]+)*)\+\+").unwrap());
58
59/// Common keyboard key names for validation
60pub const COMMON_KEYS: &[&str] = &[
61    "ctrl",
62    "alt",
63    "shift",
64    "cmd",
65    "meta",
66    "win",
67    "windows",
68    "option",
69    "enter",
70    "return",
71    "tab",
72    "space",
73    "backspace",
74    "delete",
75    "del",
76    "insert",
77    "ins",
78    "home",
79    "end",
80    "pageup",
81    "pagedown",
82    "up",
83    "down",
84    "left",
85    "right",
86    "escape",
87    "esc",
88    "capslock",
89    "numlock",
90    "scrolllock",
91    "printscreen",
92    "pause",
93    "break",
94    "f1",
95    "f2",
96    "f3",
97    "f4",
98    "f5",
99    "f6",
100    "f7",
101    "f8",
102    "f9",
103    "f10",
104    "f11",
105    "f12",
106    // Letters and numbers are also valid
107];
108
109/// Parsed keyboard shortcut
110#[derive(Debug, Clone, PartialEq)]
111pub struct KeyboardShortcut {
112    /// The full shortcut text including ++ markers
113    pub full_text: String,
114    /// Individual keys in the shortcut
115    pub keys: Vec<String>,
116    /// Start position in the line (0-indexed)
117    pub start: usize,
118    /// End position in the line (0-indexed, exclusive)
119    pub end: usize,
120}
121
122/// Check if a line contains keyboard key notation
123#[inline]
124pub fn contains_keys(line: &str) -> bool {
125    if !line.contains("++") {
126        return false;
127    }
128    KEYS_PATTERN.is_match(line)
129}
130
131/// Find all keyboard shortcuts in a line
132pub fn find_keyboard_shortcuts(line: &str) -> Vec<KeyboardShortcut> {
133    if !line.contains("++") {
134        return Vec::new();
135    }
136
137    let mut results = Vec::new();
138
139    for m in KEYS_PATTERN.find_iter(line) {
140        let full_text = m.as_str().to_string();
141        // Remove the surrounding ++ and split by +
142        let inner = &full_text[2..full_text.len() - 2];
143        let keys: Vec<String> = inner.split('+').map(|s| s.to_string()).collect();
144
145        results.push(KeyboardShortcut {
146            full_text,
147            keys,
148            start: m.start(),
149            end: m.end(),
150        });
151    }
152
153    results
154}
155
156/// Check if a position in a line is within a keyboard shortcut
157pub fn is_in_keys(line: &str, position: usize) -> bool {
158    for shortcut in find_keyboard_shortcuts(line) {
159        if shortcut.start <= position && position < shortcut.end {
160            return true;
161        }
162    }
163    false
164}
165
166// ============================================================================
167// Caret: ^superscript^ and ^^insert^^ syntax
168// ============================================================================
169
170/// Pattern to match insert: `^^text^^` (double caret)
171/// Must be checked before superscript since ^^ is more specific
172static INSERT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^\^([^\^]+)\^\^").unwrap());
173
174/// Check if a line contains superscript syntax
175/// Returns true if there's a single caret pattern that's NOT part of ^^insert^^
176#[inline]
177pub fn contains_superscript(line: &str) -> bool {
178    if !line.contains('^') {
179        return false;
180    }
181
182    // Mask out insert patterns (^^text^^) first
183    let masked = mask_insert_patterns(line);
184
185    // Now check for single caret superscript in the remaining text
186    // We need a simple pattern: ^text^ where text doesn't contain ^
187    let bytes = masked.as_bytes();
188    let mut i = 0;
189    while i < bytes.len() {
190        if bytes[i] == b'^' {
191            // Check if this is start of superscript (not masked)
192            // Find the closing ^
193            if let Some(end) = masked[i + 1..].find('^') {
194                let end_pos = i + 1 + end;
195                // Check that content between carets is not empty and doesn't contain ^
196                let content = &masked[i + 1..end_pos];
197                if !content.is_empty() && !content.contains('^') {
198                    return true;
199                }
200            }
201        }
202        i += 1;
203    }
204    false
205}
206
207/// Mask insert patterns (^^text^^) with spaces to help detect superscript
208fn mask_insert_patterns(line: &str) -> String {
209    if !line.contains("^^") {
210        return line.to_string();
211    }
212
213    let mut result = line.to_string();
214    for m in INSERT_PATTERN.find_iter(line) {
215        let replacement = " ".repeat(m.end() - m.start());
216        result.replace_range(m.start()..m.end(), &replacement);
217    }
218    result
219}
220
221/// Check if a line contains insert syntax
222#[inline]
223pub fn contains_insert(line: &str) -> bool {
224    if !line.contains("^^") {
225        return false;
226    }
227    INSERT_PATTERN.is_match(line)
228}
229
230/// Check if a position is within superscript or insert markup
231pub fn is_in_caret_markup(line: &str, position: usize) -> bool {
232    if !line.contains('^') {
233        return false;
234    }
235
236    // Check insert first (double caret takes precedence)
237    for m in INSERT_PATTERN.find_iter(line) {
238        if m.start() <= position && position < m.end() {
239            return true;
240        }
241    }
242
243    // Check superscript - find ^text^ patterns that aren't part of ^^insert^^
244    let masked = mask_insert_patterns(line);
245    let bytes = masked.as_bytes();
246    let mut i = 0;
247    while i < bytes.len() {
248        if bytes[i] == b'^' {
249            // Find the closing ^
250            if let Some(end) = masked[i + 1..].find('^') {
251                let end_pos = i + 1 + end;
252                // Check that content between carets is not empty
253                let content = &masked[i + 1..end_pos];
254                if !content.is_empty() && !content.contains('^') && position >= i && position <= end_pos + 1 {
255                    return true;
256                }
257                // Skip past this pattern
258                i = end_pos + 1;
259                continue;
260            }
261        }
262        i += 1;
263    }
264
265    false
266}
267
268// ============================================================================
269// Mark: ==highlighted== syntax
270// ============================================================================
271
272/// Pattern to match highlight/mark: `==text==`
273static MARK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"==([^=]+)==").unwrap());
274
275/// Check if a line contains mark/highlight syntax
276#[inline]
277pub fn contains_mark(line: &str) -> bool {
278    if !line.contains("==") {
279        return false;
280    }
281    MARK_PATTERN.is_match(line)
282}
283
284/// Check if a position is within mark markup
285pub fn is_in_mark(line: &str, position: usize) -> bool {
286    if !line.contains("==") {
287        return false;
288    }
289
290    for m in MARK_PATTERN.find_iter(line) {
291        if m.start() <= position && position < m.end() {
292            return true;
293        }
294    }
295
296    false
297}
298
299// ============================================================================
300// SmartSymbols: (c), (tm), (r), -->, <--, etc.
301// ============================================================================
302
303/// SmartSymbol patterns and their Unicode replacements
304/// Note: This constant is kept for documentation reference but not currently
305/// used since we only need to detect patterns, not replace them.
306/// Patterns: (c)→©, (r)→®, (tm)→™, ...→…, --→–, ---→—, ->→→, <-→←, etc.
307#[allow(dead_code)]
308const SMART_SYMBOLS_DOC: &str =
309    "(c)©, (r)®, (tm)™, ...→…, --→–, ---→—, <->↔, =>⇒, <=⇐, <=>⇔, 1/4¼, 1/2½, 3/4¾, +-±, !=≠";
310
311/// Pattern to match any SmartSymbol that might be replaced
312static SMART_SYMBOL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
313    Regex::new(r"(?:\(c\)|\(C\)|\(r\)|\(R\)|\(tm\)|\(TM\)|\(p\)|\.\.\.|-{2,3}|<->|<-|->|<=>|<=|=>|1/4|1/2|3/4|\+-|!=)")
314        .unwrap()
315});
316
317/// Check if a line contains potential SmartSymbol patterns
318#[inline]
319pub fn contains_smart_symbols(line: &str) -> bool {
320    // Quick checks for common patterns
321    if !line.contains('(')
322        && !line.contains("...")
323        && !line.contains("--")
324        && !line.contains("->")
325        && !line.contains("<-")
326        && !line.contains("=>")
327        && !line.contains("<=")
328        && !line.contains("1/")
329        && !line.contains("3/")
330        && !line.contains("+-")
331        && !line.contains("!=")
332    {
333        return false;
334    }
335    SMART_SYMBOL_PATTERN.is_match(line)
336}
337
338/// Check if a position is at a SmartSymbol that will be replaced
339pub fn is_in_smart_symbol(line: &str, position: usize) -> bool {
340    for m in SMART_SYMBOL_PATTERN.find_iter(line) {
341        if m.start() <= position && position < m.end() {
342            return true;
343        }
344    }
345    false
346}
347
348// ============================================================================
349// Combined utilities
350// ============================================================================
351
352/// Check if a position is within any PyMdown extension markup
353///
354/// This includes Keys, Caret (superscript/insert), Mark, and SmartSymbols.
355/// InlineHilite is excluded as it uses standard backtick syntax.
356pub fn is_in_pymdown_markup(line: &str, position: usize) -> bool {
357    is_in_keys(line, position)
358        || is_in_caret_markup(line, position)
359        || is_in_mark(line, position)
360        || is_in_smart_symbol(line, position)
361}
362
363/// Mask all PyMdown extension markup with spaces
364///
365/// Useful for rules that need to process text without being confused
366/// by extension syntax.
367pub fn mask_pymdown_markup(line: &str) -> String {
368    let mut result = line.to_string();
369
370    // Process in specific order to handle overlapping patterns correctly
371
372    // Keys: ++key++
373    if line.contains("++") {
374        for m in KEYS_PATTERN.find_iter(line) {
375            let replacement = " ".repeat(m.end() - m.start());
376            // We need to replace at the right position considering previous replacements
377            // Since we're replacing with same-length strings, positions stay the same
378            result.replace_range(m.start()..m.end(), &replacement);
379        }
380    }
381
382    // Insert: ^^text^^ (must come before superscript)
383    if result.contains("^^") {
384        let temp = result.clone();
385        for m in INSERT_PATTERN.find_iter(&temp) {
386            let replacement = " ".repeat(m.end() - m.start());
387            result.replace_range(m.start()..m.end(), &replacement);
388        }
389    }
390
391    // Superscript: ^text^ - use manual parsing since regex crate doesn't support lookaround
392    if result.contains('^') {
393        let mut new_result = result.clone();
394        let bytes = result.as_bytes();
395        let mut superscript_ranges = Vec::new();
396
397        let mut i = 0;
398        while i < bytes.len() {
399            if bytes[i] == b'^' {
400                // Find closing ^
401                if let Some(end) = result[i + 1..].find('^') {
402                    let end_pos = i + 1 + end;
403                    let content = &result[i + 1..end_pos];
404                    // Valid superscript if content is not empty and doesn't contain ^
405                    if !content.is_empty() && !content.contains('^') {
406                        superscript_ranges.push((i, end_pos + 1));
407                        i = end_pos + 1;
408                        continue;
409                    }
410                }
411            }
412            i += 1;
413        }
414
415        // Apply masking in reverse order to preserve indices
416        for (start, end) in superscript_ranges.into_iter().rev() {
417            let replacement = " ".repeat(end - start);
418            new_result.replace_range(start..end, &replacement);
419        }
420        result = new_result;
421    }
422
423    // Mark: ==text==
424    if result.contains("==") {
425        let temp = result.clone();
426        for m in MARK_PATTERN.find_iter(&temp) {
427            let replacement = " ".repeat(m.end() - m.start());
428            result.replace_range(m.start()..m.end(), &replacement);
429        }
430    }
431
432    result
433}
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438
439    // InlineHilite tests
440    #[test]
441    fn test_contains_inline_hilite() {
442        assert!(contains_inline_hilite("`#!python print('hello')`"));
443        assert!(contains_inline_hilite("Use `#!js alert('hi')` for alerts"));
444        assert!(contains_inline_hilite("`#!c++ cout << x;`"));
445
446        // Not InlineHilite
447        assert!(!contains_inline_hilite("`regular code`"));
448        assert!(!contains_inline_hilite("#! not in backticks"));
449        assert!(!contains_inline_hilite("`#!` empty"));
450    }
451
452    #[test]
453    fn test_is_inline_hilite_content() {
454        assert!(is_inline_hilite_content("#!python print()"));
455        assert!(is_inline_hilite_content("#!js code"));
456
457        assert!(!is_inline_hilite_content("regular code"));
458        assert!(!is_inline_hilite_content(" #!python with space"));
459    }
460
461    // Keys tests
462    #[test]
463    fn test_contains_keys() {
464        assert!(contains_keys("Press ++ctrl++ to continue"));
465        assert!(contains_keys("++ctrl+alt+delete++"));
466        assert!(contains_keys("Use ++cmd+shift+p++ for command palette"));
467
468        assert!(!contains_keys("Use + for addition"));
469        assert!(!contains_keys("a++ increment"));
470        assert!(!contains_keys("++incomplete"));
471    }
472
473    #[test]
474    fn test_find_keyboard_shortcuts() {
475        let shortcuts = find_keyboard_shortcuts("Press ++ctrl+c++ then ++ctrl+v++");
476        assert_eq!(shortcuts.len(), 2);
477        assert_eq!(shortcuts[0].keys, vec!["ctrl", "c"]);
478        assert_eq!(shortcuts[1].keys, vec!["ctrl", "v"]);
479
480        let shortcuts = find_keyboard_shortcuts("++ctrl+alt+delete++");
481        assert_eq!(shortcuts.len(), 1);
482        assert_eq!(shortcuts[0].keys, vec!["ctrl", "alt", "delete"]);
483    }
484
485    #[test]
486    fn test_is_in_keys() {
487        let line = "Press ++ctrl++ here";
488        assert!(!is_in_keys(line, 0)); // "P"
489        assert!(!is_in_keys(line, 5)); // " "
490        assert!(is_in_keys(line, 6)); // first +
491        assert!(is_in_keys(line, 10)); // "r"
492        assert!(is_in_keys(line, 13)); // last +
493        assert!(!is_in_keys(line, 14)); // " "
494    }
495
496    // Caret tests
497    #[test]
498    fn test_contains_superscript() {
499        assert!(contains_superscript("E=mc^2^"));
500        assert!(contains_superscript("x^n^ power"));
501
502        assert!(!contains_superscript("no caret here"));
503        assert!(!contains_superscript("^^insert^^")); // double caret is insert
504    }
505
506    #[test]
507    fn test_contains_insert() {
508        assert!(contains_insert("^^inserted text^^"));
509        assert!(contains_insert("Some ^^new^^ text"));
510
511        assert!(!contains_insert("^superscript^"));
512        assert!(!contains_insert("no markup"));
513    }
514
515    #[test]
516    fn test_is_in_caret_markup() {
517        let line = "Text ^super^ here";
518        assert!(!is_in_caret_markup(line, 0));
519        assert!(is_in_caret_markup(line, 5)); // "^"
520        assert!(is_in_caret_markup(line, 8)); // "p"
521        assert!(!is_in_caret_markup(line, 13)); // " "
522
523        let line2 = "Text ^^insert^^ here";
524        assert!(is_in_caret_markup(line2, 5)); // first ^
525        assert!(is_in_caret_markup(line2, 10)); // "e"
526    }
527
528    // Mark tests
529    #[test]
530    fn test_contains_mark() {
531        assert!(contains_mark("This is ==highlighted== text"));
532        assert!(contains_mark("==important=="));
533
534        assert!(!contains_mark("no highlight"));
535        assert!(!contains_mark("a == b comparison")); // spaces
536    }
537
538    #[test]
539    fn test_is_in_mark() {
540        let line = "Text ==highlight== more";
541        assert!(!is_in_mark(line, 0));
542        assert!(is_in_mark(line, 5)); // first =
543        assert!(is_in_mark(line, 10)); // "h"
544        assert!(!is_in_mark(line, 19)); // " "
545    }
546
547    // SmartSymbols tests
548    #[test]
549    fn test_contains_smart_symbols() {
550        assert!(contains_smart_symbols("Copyright (c) 2024"));
551        assert!(contains_smart_symbols("This is (tm) trademarked"));
552        assert!(contains_smart_symbols("Left arrow <- here"));
553        assert!(contains_smart_symbols("Right arrow -> there"));
554        assert!(contains_smart_symbols("Em dash --- here"));
555        assert!(contains_smart_symbols("Fraction 1/2"));
556
557        assert!(!contains_smart_symbols("No symbols here"));
558        assert!(!contains_smart_symbols("(other) parentheses"));
559    }
560
561    #[test]
562    fn test_is_in_smart_symbol() {
563        let line = "Copyright (c) text";
564        assert!(!is_in_smart_symbol(line, 0));
565        assert!(is_in_smart_symbol(line, 10)); // "("
566        assert!(is_in_smart_symbol(line, 11)); // "c"
567        assert!(is_in_smart_symbol(line, 12)); // ")"
568        assert!(!is_in_smart_symbol(line, 14)); // " "
569    }
570
571    // Combined tests
572    #[test]
573    fn test_is_in_pymdown_markup() {
574        assert!(is_in_pymdown_markup("++ctrl++", 2));
575        assert!(is_in_pymdown_markup("^super^", 1));
576        assert!(is_in_pymdown_markup("==mark==", 2));
577        assert!(is_in_pymdown_markup("(c)", 1));
578
579        assert!(!is_in_pymdown_markup("plain text", 5));
580    }
581
582    #[test]
583    fn test_mask_pymdown_markup() {
584        let line = "Press ++ctrl++ and ^super^ with ==mark==";
585        let masked = mask_pymdown_markup(line);
586        assert!(!masked.contains("++"));
587        assert!(!masked.contains("^super^"));
588        assert!(!masked.contains("==mark=="));
589        assert!(masked.contains("Press"));
590        assert!(masked.contains("and"));
591        assert!(masked.contains("with"));
592        // Length should be preserved
593        assert_eq!(masked.len(), line.len());
594    }
595}