Skip to main content

rumdl_lib/utils/
mkdocs_extensions.rs

1/// MkDocs PyMdown extensions support
2///
3/// This module provides support for various PyMdown Markdown extensions
4/// commonly used with MkDocs Material:
5///
6/// - **InlineHilite**: Inline code highlighting `` `#!python code` ``
7/// - **Keys**: Keyboard key notation `++ctrl+alt+delete++`
8/// - **Caret**: Superscript and insert `^superscript^` and `^^insert^^`
9/// - **Tilde**: Subscript and strikethrough `~subscript~` and `~~strike~~`
10/// - **Mark**: Highlight text `==highlighted==`
11/// - **SmartSymbols**: Auto-replace symbols `(c)` → `©`
12///
13/// ## Architecture
14///
15/// All markup detection follows a consistent span-based pattern:
16/// 1. `find_*_spans(line) -> Vec<(usize, usize)>` - find byte ranges
17/// 2. `contains_*(line) -> bool` - check if markup exists
18/// 3. `is_in_*(line, position) -> bool` - check if position is inside markup
19///
20/// For double-takes-precedence patterns (caret: ^^/^, tilde: ~~/~):
21/// - Double-delimiter spans are found first
22/// - Single-delimiter spans exclude positions inside double spans
23///
24/// ## References
25///
26/// - [PyMdown Extensions](https://facelessuser.github.io/pymdown-extensions/)
27use regex::Regex;
28use std::sync::LazyLock;
29
30// ============================================================================
31// Core span utilities
32// ============================================================================
33
34/// Check if a byte position falls within any span.
35/// Assumes spans are sorted by start position for early-exit optimization.
36#[inline]
37fn position_in_spans(position: usize, spans: &[(usize, usize)]) -> bool {
38    for &(start, end) in spans {
39        if position < start {
40            return false;
41        }
42        if position < end {
43            return true;
44        }
45    }
46    false
47}
48
49/// Find all regex matches as (start, end) byte spans.
50#[inline]
51fn find_regex_spans(line: &str, pattern: &Regex) -> Vec<(usize, usize)> {
52    pattern.find_iter(line).map(|m| (m.start(), m.end())).collect()
53}
54
55/// Find single-delimiter spans (like `~sub~` or `^super^`) that are NOT inside
56/// double-delimiter spans (like `~~strike~~` or `^^insert^^`).
57///
58/// Rules for single-delimiter content:
59/// - Must have at least one character between delimiters
60/// - Cannot contain whitespace (per PyMdown spec)
61/// - Cannot be inside a double-delimiter span
62fn find_single_delim_spans(line: &str, delim: char, double_spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
63    let mut spans = Vec::new();
64    let mut chars = line.char_indices().peekable();
65    let delim_len = delim.len_utf8();
66
67    while let Some((start_byte, ch)) = chars.next() {
68        // Skip if inside a double-delimiter span
69        if position_in_spans(start_byte, double_spans) {
70            continue;
71        }
72
73        if ch != delim {
74            continue;
75        }
76
77        // Check if this is a double delimiter (skip it entirely)
78        if chars.peek().is_some_and(|(_, c)| *c == delim) {
79            chars.next();
80            continue;
81        }
82
83        // Look for closing single delimiter
84        let mut found_content = false;
85        let mut has_whitespace = false;
86
87        for (byte_pos, inner_ch) in chars.by_ref() {
88            // If we enter a double-delimiter span, stop looking
89            if position_in_spans(byte_pos, double_spans) {
90                break;
91            }
92
93            if inner_ch == delim {
94                // Check it's not the start of a double delimiter
95                let is_double = chars.peek().is_some_and(|(_, c)| *c == delim);
96                if !is_double && found_content && !has_whitespace {
97                    spans.push((start_byte, byte_pos + delim_len));
98                }
99                break;
100            }
101
102            found_content = true;
103            if inner_ch.is_whitespace() {
104                has_whitespace = true;
105            }
106        }
107    }
108
109    spans
110}
111
112/// Merge overlapping or adjacent spans. Input must be sorted by start.
113fn merge_spans(spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
114    if spans.is_empty() {
115        return Vec::new();
116    }
117
118    let mut merged = Vec::with_capacity(spans.len());
119    let mut current = spans[0];
120
121    for &(start, end) in &spans[1..] {
122        if start <= current.1 {
123            current.1 = current.1.max(end);
124        } else {
125            merged.push(current);
126            current = (start, end);
127        }
128    }
129    merged.push(current);
130    merged
131}
132
133// ============================================================================
134// InlineHilite: `#!lang code` syntax for inline code with syntax highlighting
135// ============================================================================
136
137/// Pattern to match InlineHilite syntax: `#!language code`
138static INLINE_HILITE_PATTERN: LazyLock<Regex> =
139    LazyLock::new(|| Regex::new(r"`#!([a-zA-Z][a-zA-Z0-9_+-]*)\s+[^`]+`").unwrap());
140
141/// Pattern to match inline hilite shebang at the start of backtick content
142static INLINE_HILITE_SHEBANG: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#!([a-zA-Z][a-zA-Z0-9_+-]*)").unwrap());
143
144/// Check if a line contains InlineHilite syntax
145#[inline]
146pub fn contains_inline_hilite(line: &str) -> bool {
147    line.contains('`') && line.contains("#!") && INLINE_HILITE_PATTERN.is_match(line)
148}
149
150/// Check if code span content starts with InlineHilite shebang
151#[inline]
152pub fn is_inline_hilite_content(content: &str) -> bool {
153    INLINE_HILITE_SHEBANG.is_match(content)
154}
155
156// ============================================================================
157// Keys: ++key++ syntax for keyboard keys
158// ============================================================================
159
160/// Pattern to match keyboard key notation: `++key++` or `++key1+key2++`
161static KEYS_PATTERN: LazyLock<Regex> =
162    LazyLock::new(|| Regex::new(r"\+\+([a-zA-Z0-9_-]+(?:\+[a-zA-Z0-9_-]+)*)\+\+").unwrap());
163
164/// Common keyboard key names for validation
165pub const COMMON_KEYS: &[&str] = &[
166    "ctrl",
167    "alt",
168    "shift",
169    "cmd",
170    "meta",
171    "win",
172    "windows",
173    "option",
174    "enter",
175    "return",
176    "tab",
177    "space",
178    "backspace",
179    "delete",
180    "del",
181    "insert",
182    "ins",
183    "home",
184    "end",
185    "pageup",
186    "pagedown",
187    "up",
188    "down",
189    "left",
190    "right",
191    "escape",
192    "esc",
193    "capslock",
194    "numlock",
195    "scrolllock",
196    "printscreen",
197    "pause",
198    "break",
199    "f1",
200    "f2",
201    "f3",
202    "f4",
203    "f5",
204    "f6",
205    "f7",
206    "f8",
207    "f9",
208    "f10",
209    "f11",
210    "f12",
211];
212
213/// Parsed keyboard shortcut
214#[derive(Debug, Clone, PartialEq)]
215pub struct KeyboardShortcut {
216    pub full_text: String,
217    pub keys: Vec<String>,
218    pub start: usize,
219    pub end: usize,
220}
221
222/// Find all keyboard shortcut spans
223pub fn find_keys_spans(line: &str) -> Vec<(usize, usize)> {
224    if !line.contains("++") {
225        return Vec::new();
226    }
227    find_regex_spans(line, &KEYS_PATTERN)
228}
229
230/// Check if a line contains keyboard key notation
231#[inline]
232pub fn contains_keys(line: &str) -> bool {
233    line.contains("++") && KEYS_PATTERN.is_match(line)
234}
235
236/// Find all keyboard shortcuts in a line
237pub fn find_keyboard_shortcuts(line: &str) -> Vec<KeyboardShortcut> {
238    if !line.contains("++") {
239        return Vec::new();
240    }
241
242    KEYS_PATTERN
243        .find_iter(line)
244        .map(|m| {
245            let full_text = m.as_str().to_string();
246            let inner = &full_text[2..full_text.len() - 2];
247            let keys = inner.split('+').map(String::from).collect();
248            KeyboardShortcut {
249                full_text,
250                keys,
251                start: m.start(),
252                end: m.end(),
253            }
254        })
255        .collect()
256}
257
258/// Check if a position in a line is within a keyboard shortcut
259pub fn is_in_keys(line: &str, position: usize) -> bool {
260    position_in_spans(position, &find_keys_spans(line))
261}
262
263// ============================================================================
264// Caret: ^superscript^ and ^^insert^^ syntax
265// ============================================================================
266
267/// Pattern to match insert: `^^text^^` (double caret)
268/// Handles content with single carets inside (e.g., `^^a^b^^`)
269static INSERT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^\^[^\^]+(?:\^[^\^]+)*\^\^").unwrap());
270
271/// Find all insert (^^text^^) spans
272pub fn find_insert_spans(line: &str) -> Vec<(usize, usize)> {
273    if !line.contains("^^") {
274        return Vec::new();
275    }
276    find_regex_spans(line, &INSERT_PATTERN)
277}
278
279/// Find all superscript (^text^) spans, excluding those inside insert spans
280pub fn find_superscript_spans(line: &str) -> Vec<(usize, usize)> {
281    if !line.contains('^') {
282        return Vec::new();
283    }
284    let insert_spans = find_insert_spans(line);
285    find_single_delim_spans(line, '^', &insert_spans)
286}
287
288/// Check if a line contains superscript syntax (^text^ not inside ^^insert^^)
289#[inline]
290pub fn contains_superscript(line: &str) -> bool {
291    !find_superscript_spans(line).is_empty()
292}
293
294/// Check if a line contains insert syntax (^^text^^)
295#[inline]
296pub fn contains_insert(line: &str) -> bool {
297    line.contains("^^") && INSERT_PATTERN.is_match(line)
298}
299
300/// Check if a position is within superscript or insert markup
301pub fn is_in_caret_markup(line: &str, position: usize) -> bool {
302    if !line.contains('^') {
303        return false;
304    }
305    let insert_spans = find_insert_spans(line);
306    if position_in_spans(position, &insert_spans) {
307        return true;
308    }
309    let super_spans = find_single_delim_spans(line, '^', &insert_spans);
310    position_in_spans(position, &super_spans)
311}
312
313// ============================================================================
314// Tilde: ~subscript~ and ~~strikethrough~~ syntax
315// ============================================================================
316
317/// Pattern to match strikethrough: `~~text~~` (double tilde)
318/// Handles content with single tildes inside (e.g., `~~a~b~~`)
319static STRIKETHROUGH_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~[^~]+(?:~[^~]+)*~~").unwrap());
320
321/// Find all strikethrough (~~text~~) spans
322pub fn find_strikethrough_spans(line: &str) -> Vec<(usize, usize)> {
323    if !line.contains("~~") {
324        return Vec::new();
325    }
326    find_regex_spans(line, &STRIKETHROUGH_PATTERN)
327}
328
329/// Find all subscript (~text~) spans, excluding those inside strikethrough spans
330pub fn find_subscript_spans(line: &str) -> Vec<(usize, usize)> {
331    if !line.contains('~') {
332        return Vec::new();
333    }
334    let strike_spans = find_strikethrough_spans(line);
335    find_single_delim_spans(line, '~', &strike_spans)
336}
337
338/// Check if a line contains subscript syntax (~text~ not inside ~~strike~~)
339#[inline]
340pub fn contains_subscript(line: &str) -> bool {
341    !find_subscript_spans(line).is_empty()
342}
343
344/// Check if a line contains strikethrough syntax (~~text~~)
345#[inline]
346pub fn contains_strikethrough(line: &str) -> bool {
347    line.contains("~~") && STRIKETHROUGH_PATTERN.is_match(line)
348}
349
350/// Check if a position is within subscript or strikethrough markup
351pub fn is_in_tilde_markup(line: &str, position: usize) -> bool {
352    if !line.contains('~') {
353        return false;
354    }
355    let strike_spans = find_strikethrough_spans(line);
356    if position_in_spans(position, &strike_spans) {
357        return true;
358    }
359    let sub_spans = find_single_delim_spans(line, '~', &strike_spans);
360    position_in_spans(position, &sub_spans)
361}
362
363// ============================================================================
364// Mark: ==highlighted== syntax
365// ============================================================================
366
367/// Pattern to match highlight/mark: `==text==`
368static MARK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"==([^=]+)==").unwrap());
369
370/// Find all mark (==text==) spans
371pub fn find_mark_spans(line: &str) -> Vec<(usize, usize)> {
372    if !line.contains("==") {
373        return Vec::new();
374    }
375    find_regex_spans(line, &MARK_PATTERN)
376}
377
378/// Check if a line contains mark/highlight syntax
379#[inline]
380pub fn contains_mark(line: &str) -> bool {
381    line.contains("==") && MARK_PATTERN.is_match(line)
382}
383
384/// Check if a position is within mark markup
385pub fn is_in_mark(line: &str, position: usize) -> bool {
386    position_in_spans(position, &find_mark_spans(line))
387}
388
389// ============================================================================
390// SmartSymbols: (c), (tm), (r), -->, <--, etc.
391// ============================================================================
392
393/// Pattern to match any SmartSymbol that might be replaced
394static SMART_SYMBOL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
395    Regex::new(r"(?:\(c\)|\(C\)|\(r\)|\(R\)|\(tm\)|\(TM\)|\(p\)|\.\.\.|-{2,3}|<->|<-|->|<=>|<=|=>|1/4|1/2|3/4|\+-|!=)")
396        .unwrap()
397});
398
399/// Find all SmartSymbol spans
400pub fn find_smart_symbol_spans(line: &str) -> Vec<(usize, usize)> {
401    // Quick rejection checks
402    if !line.contains('(')
403        && !line.contains("...")
404        && !line.contains("--")
405        && !line.contains("->")
406        && !line.contains("<-")
407        && !line.contains("=>")
408        && !line.contains("<=")
409        && !line.contains("1/")
410        && !line.contains("3/")
411        && !line.contains("+-")
412        && !line.contains("!=")
413    {
414        return Vec::new();
415    }
416    find_regex_spans(line, &SMART_SYMBOL_PATTERN)
417}
418
419/// Check if a line contains potential SmartSymbol patterns
420#[inline]
421pub fn contains_smart_symbols(line: &str) -> bool {
422    !find_smart_symbol_spans(line).is_empty()
423}
424
425/// Check if a position is at a SmartSymbol
426pub fn is_in_smart_symbol(line: &str, position: usize) -> bool {
427    position_in_spans(position, &find_smart_symbol_spans(line))
428}
429
430// ============================================================================
431// Combined utilities
432// ============================================================================
433
434/// Check if a position is within any PyMdown extension markup
435pub fn is_in_pymdown_markup(line: &str, position: usize) -> bool {
436    is_in_keys(line, position)
437        || is_in_caret_markup(line, position)
438        || is_in_tilde_markup(line, position)
439        || is_in_mark(line, position)
440        || is_in_smart_symbol(line, position)
441}
442
443/// Mask all PyMdown extension markup with spaces (single-pass)
444///
445/// This function collects all markup spans and replaces them with spaces
446/// in a single pass, preserving string length for position-based operations.
447pub fn mask_pymdown_markup(line: &str) -> String {
448    // Collect all spans to mask
449    let mut all_spans: Vec<(usize, usize)> = Vec::new();
450
451    // Keys
452    all_spans.extend(find_keys_spans(line));
453
454    // Caret: insert and superscript
455    if line.contains('^') {
456        let insert_spans = find_insert_spans(line);
457        let super_spans = find_single_delim_spans(line, '^', &insert_spans);
458        all_spans.extend(insert_spans);
459        all_spans.extend(super_spans);
460    }
461
462    // Tilde: strikethrough and subscript
463    if line.contains('~') {
464        let strike_spans = find_strikethrough_spans(line);
465        let sub_spans = find_single_delim_spans(line, '~', &strike_spans);
466        all_spans.extend(strike_spans);
467        all_spans.extend(sub_spans);
468    }
469
470    // Mark
471    all_spans.extend(find_mark_spans(line));
472
473    // Early return if nothing to mask
474    if all_spans.is_empty() {
475        return line.to_string();
476    }
477
478    // Sort by start position and merge overlapping spans
479    all_spans.sort_unstable_by_key(|&(start, _)| start);
480    let merged = merge_spans(&all_spans);
481
482    // Build result in single pass
483    let mut result = String::with_capacity(line.len());
484    let mut last_end = 0;
485
486    for (start, end) in merged {
487        result.push_str(&line[last_end..start]);
488        // Use spaces to preserve length
489        for _ in 0..(end - start) {
490            result.push(' ');
491        }
492        last_end = end;
493    }
494    result.push_str(&line[last_end..]);
495
496    result
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    // =========================================================================
504    // Core utility tests
505    // =========================================================================
506
507    #[test]
508    fn test_position_in_spans_empty() {
509        assert!(!position_in_spans(0, &[]));
510        assert!(!position_in_spans(100, &[]));
511    }
512
513    #[test]
514    fn test_position_in_spans_early_exit() {
515        let spans = [(10, 20), (30, 40)];
516        assert!(!position_in_spans(5, &spans)); // Before all spans
517        assert!(!position_in_spans(25, &spans)); // Between spans
518        assert!(!position_in_spans(50, &spans)); // After all spans
519    }
520
521    #[test]
522    fn test_position_in_spans_inside() {
523        let spans = [(10, 20), (30, 40)];
524        assert!(position_in_spans(10, &spans)); // Start of first span
525        assert!(position_in_spans(15, &spans)); // Middle of first span
526        assert!(position_in_spans(19, &spans)); // End-1 of first span
527        assert!(!position_in_spans(20, &spans)); // End of first span (exclusive)
528        assert!(position_in_spans(30, &spans)); // Start of second span
529    }
530
531    #[test]
532    fn test_merge_spans_empty() {
533        assert!(merge_spans(&[]).is_empty());
534    }
535
536    #[test]
537    fn test_merge_spans_no_overlap() {
538        let spans = [(0, 5), (10, 15), (20, 25)];
539        let merged = merge_spans(&spans);
540        assert_eq!(merged, vec![(0, 5), (10, 15), (20, 25)]);
541    }
542
543    #[test]
544    fn test_merge_spans_overlapping() {
545        let spans = [(0, 10), (5, 15), (20, 25)];
546        let merged = merge_spans(&spans);
547        assert_eq!(merged, vec![(0, 15), (20, 25)]);
548    }
549
550    #[test]
551    fn test_merge_spans_adjacent() {
552        let spans = [(0, 10), (10, 20)];
553        let merged = merge_spans(&spans);
554        assert_eq!(merged, vec![(0, 20)]);
555    }
556
557    // =========================================================================
558    // InlineHilite tests
559    // =========================================================================
560
561    #[test]
562    fn test_contains_inline_hilite() {
563        assert!(contains_inline_hilite("`#!python print('hello')`"));
564        assert!(contains_inline_hilite("Use `#!js alert('hi')` for alerts"));
565        assert!(contains_inline_hilite("`#!c++ cout << x;`"));
566
567        assert!(!contains_inline_hilite("`regular code`"));
568        assert!(!contains_inline_hilite("#! not in backticks"));
569        assert!(!contains_inline_hilite("`#!` empty"));
570    }
571
572    #[test]
573    fn test_is_inline_hilite_content() {
574        assert!(is_inline_hilite_content("#!python print()"));
575        assert!(is_inline_hilite_content("#!js code"));
576
577        assert!(!is_inline_hilite_content("regular code"));
578        assert!(!is_inline_hilite_content(" #!python with space"));
579    }
580
581    // =========================================================================
582    // Keys tests
583    // =========================================================================
584
585    #[test]
586    fn test_contains_keys() {
587        assert!(contains_keys("Press ++ctrl++ to continue"));
588        assert!(contains_keys("++ctrl+alt+delete++"));
589        assert!(contains_keys("Use ++cmd+shift+p++ for command palette"));
590
591        assert!(!contains_keys("Use + for addition"));
592        assert!(!contains_keys("a++ increment"));
593        assert!(!contains_keys("++incomplete"));
594    }
595
596    #[test]
597    fn test_find_keyboard_shortcuts() {
598        let shortcuts = find_keyboard_shortcuts("Press ++ctrl+c++ then ++ctrl+v++");
599        assert_eq!(shortcuts.len(), 2);
600        assert_eq!(shortcuts[0].keys, vec!["ctrl", "c"]);
601        assert_eq!(shortcuts[1].keys, vec!["ctrl", "v"]);
602
603        let shortcuts = find_keyboard_shortcuts("++ctrl+alt+delete++");
604        assert_eq!(shortcuts.len(), 1);
605        assert_eq!(shortcuts[0].keys, vec!["ctrl", "alt", "delete"]);
606    }
607
608    #[test]
609    fn test_is_in_keys() {
610        let line = "Press ++ctrl++ here";
611        assert!(!is_in_keys(line, 0)); // "P"
612        assert!(!is_in_keys(line, 5)); // " "
613        assert!(is_in_keys(line, 6)); // first +
614        assert!(is_in_keys(line, 10)); // "r"
615        assert!(is_in_keys(line, 13)); // last +
616        assert!(!is_in_keys(line, 14)); // " "
617    }
618
619    // =========================================================================
620    // Caret tests
621    // =========================================================================
622
623    #[test]
624    fn test_contains_superscript() {
625        assert!(contains_superscript("E=mc^2^"));
626        assert!(contains_superscript("x^n^ power"));
627
628        assert!(!contains_superscript("no caret here"));
629        assert!(!contains_superscript("^^insert^^")); // double caret is insert
630    }
631
632    #[test]
633    fn test_contains_insert() {
634        assert!(contains_insert("^^inserted text^^"));
635        assert!(contains_insert("Some ^^new^^ text"));
636
637        assert!(!contains_insert("^superscript^"));
638        assert!(!contains_insert("no markup"));
639    }
640
641    #[test]
642    fn test_find_superscript_spans() {
643        let spans = find_superscript_spans("E=mc^2^");
644        assert_eq!(spans.len(), 1);
645        assert_eq!(&"E=mc^2^"[spans[0].0..spans[0].1], "^2^");
646    }
647
648    #[test]
649    fn test_superscript_not_inside_insert() {
650        // ^x^ inside ^^text^^ should not be detected as superscript
651        let line = "^^some^x^text^^";
652        let spans = find_superscript_spans(line);
653        assert!(spans.is_empty(), "Superscript inside insert should not be detected");
654    }
655
656    #[test]
657    fn test_is_in_caret_markup() {
658        let line = "Text ^super^ here";
659        assert!(!is_in_caret_markup(line, 0));
660        assert!(is_in_caret_markup(line, 5)); // "^"
661        assert!(is_in_caret_markup(line, 8)); // "p"
662        assert!(!is_in_caret_markup(line, 13)); // " "
663
664        let line2 = "Text ^^insert^^ here";
665        assert!(is_in_caret_markup(line2, 5)); // first ^
666        assert!(is_in_caret_markup(line2, 10)); // "e"
667    }
668
669    // =========================================================================
670    // Tilde tests
671    // =========================================================================
672
673    #[test]
674    fn test_contains_subscript() {
675        assert!(contains_subscript("H~2~O"));
676        assert!(contains_subscript("x~n~ power"));
677
678        assert!(!contains_subscript("no tilde here"));
679        assert!(!contains_subscript("~~strikethrough~~"));
680    }
681
682    #[test]
683    fn test_contains_strikethrough() {
684        assert!(contains_strikethrough("~~deleted text~~"));
685        assert!(contains_strikethrough("Some ~~old~~ text"));
686        assert!(contains_strikethrough("~~a~b~~")); // single tilde inside is OK
687
688        assert!(!contains_strikethrough("~subscript~"));
689        assert!(!contains_strikethrough("no markup"));
690    }
691
692    #[test]
693    fn test_find_subscript_spans() {
694        let spans = find_subscript_spans("H~2~O");
695        assert_eq!(spans.len(), 1);
696        assert_eq!(&"H~2~O"[spans[0].0..spans[0].1], "~2~");
697    }
698
699    #[test]
700    fn test_subscript_not_inside_strikethrough() {
701        let line = "~~some~x~text~~";
702        let spans = find_subscript_spans(line);
703        assert!(
704            spans.is_empty(),
705            "Subscript inside strikethrough should not be detected"
706        );
707    }
708
709    #[test]
710    fn test_multiple_subscripts() {
711        let line = "~a~ and ~b~";
712        let spans = find_subscript_spans(line);
713        assert_eq!(spans.len(), 2);
714        assert_eq!(&line[spans[0].0..spans[0].1], "~a~");
715        assert_eq!(&line[spans[1].0..spans[1].1], "~b~");
716    }
717
718    #[test]
719    fn test_subscript_no_whitespace() {
720        let line = "~no spaces allowed~";
721        let spans = find_subscript_spans(line);
722        assert!(spans.is_empty(), "Subscript with whitespace should not match");
723    }
724
725    #[test]
726    fn test_is_in_tilde_markup() {
727        let line = "Text ~sub~ here";
728        assert!(!is_in_tilde_markup(line, 0));
729        assert!(is_in_tilde_markup(line, 5)); // "~"
730        assert!(is_in_tilde_markup(line, 7)); // "u"
731        assert!(!is_in_tilde_markup(line, 12)); // " "
732
733        let line2 = "Text ~~strike~~ here";
734        assert!(is_in_tilde_markup(line2, 5)); // first ~
735        assert!(is_in_tilde_markup(line2, 10)); // "i"
736    }
737
738    #[test]
739    fn test_subscript_vs_strikethrough_coexist() {
740        let line = "H~2~O is ~~not~~ water";
741        assert!(contains_subscript(line));
742        assert!(contains_strikethrough(line));
743    }
744
745    #[test]
746    fn test_strikethrough_with_internal_tilde() {
747        // ~~a~b~~ should match as one strikethrough, not as strikethrough + subscript
748        let line = "~~a~b~~";
749        assert!(contains_strikethrough(line));
750
751        let strike_spans = find_strikethrough_spans(line);
752        assert_eq!(strike_spans.len(), 1);
753        assert_eq!(&line[strike_spans[0].0..strike_spans[0].1], "~~a~b~~");
754
755        // No subscript should be found
756        assert!(!contains_subscript(line));
757    }
758
759    // =========================================================================
760    // Mark tests
761    // =========================================================================
762
763    #[test]
764    fn test_contains_mark() {
765        assert!(contains_mark("This is ==highlighted== text"));
766        assert!(contains_mark("==important=="));
767
768        assert!(!contains_mark("no highlight"));
769        assert!(!contains_mark("a == b comparison")); // spaces
770    }
771
772    #[test]
773    fn test_is_in_mark() {
774        let line = "Text ==highlight== more";
775        assert!(!is_in_mark(line, 0));
776        assert!(is_in_mark(line, 5)); // first =
777        assert!(is_in_mark(line, 10)); // "h"
778        assert!(!is_in_mark(line, 19)); // " "
779    }
780
781    // =========================================================================
782    // SmartSymbols tests
783    // =========================================================================
784
785    #[test]
786    fn test_contains_smart_symbols() {
787        assert!(contains_smart_symbols("Copyright (c) 2024"));
788        assert!(contains_smart_symbols("This is (tm) trademarked"));
789        assert!(contains_smart_symbols("Left arrow <- here"));
790        assert!(contains_smart_symbols("Right arrow -> there"));
791        assert!(contains_smart_symbols("Em dash --- here"));
792        assert!(contains_smart_symbols("Fraction 1/2"));
793
794        assert!(!contains_smart_symbols("No symbols here"));
795        assert!(!contains_smart_symbols("(other) parentheses"));
796    }
797
798    #[test]
799    fn test_is_in_smart_symbol() {
800        let line = "Copyright (c) text";
801        assert!(!is_in_smart_symbol(line, 0));
802        assert!(is_in_smart_symbol(line, 10)); // "("
803        assert!(is_in_smart_symbol(line, 11)); // "c"
804        assert!(is_in_smart_symbol(line, 12)); // ")"
805        assert!(!is_in_smart_symbol(line, 14)); // " "
806    }
807
808    // =========================================================================
809    // Combined tests
810    // =========================================================================
811
812    #[test]
813    fn test_is_in_pymdown_markup() {
814        assert!(is_in_pymdown_markup("++ctrl++", 2));
815        assert!(is_in_pymdown_markup("^super^", 1));
816        assert!(is_in_pymdown_markup("~sub~", 1));
817        assert!(is_in_pymdown_markup("~~strike~~", 2));
818        assert!(is_in_pymdown_markup("==mark==", 2));
819        assert!(is_in_pymdown_markup("(c)", 1));
820
821        assert!(!is_in_pymdown_markup("plain text", 5));
822    }
823
824    #[test]
825    fn test_mask_pymdown_markup() {
826        let line = "Press ++ctrl++ and ^super^ with ==mark==";
827        let masked = mask_pymdown_markup(line);
828        assert!(!masked.contains("++"));
829        assert!(!masked.contains("^super^"));
830        assert!(!masked.contains("==mark=="));
831        assert!(masked.contains("Press"));
832        assert!(masked.contains("and"));
833        assert!(masked.contains("with"));
834        assert_eq!(masked.len(), line.len());
835    }
836
837    #[test]
838    fn test_mask_pymdown_markup_with_tilde() {
839        let line = "H~2~O is ~~deleted~~ water";
840        let masked = mask_pymdown_markup(line);
841        assert!(!masked.contains("~2~"));
842        assert!(!masked.contains("~~deleted~~"));
843        assert!(masked.contains("H"));
844        assert!(masked.contains("O is"));
845        assert!(masked.contains("water"));
846        assert_eq!(masked.len(), line.len());
847    }
848
849    #[test]
850    fn test_mask_preserves_unmasked_text() {
851        let line = "plain text without markup";
852        let masked = mask_pymdown_markup(line);
853        assert_eq!(masked, line);
854    }
855
856    #[test]
857    fn test_mask_complex_mixed_markup() {
858        let line = "++ctrl++ ^2^ ~x~ ~~old~~ ==new==";
859        let masked = mask_pymdown_markup(line);
860        // All markup should be masked
861        assert!(!masked.contains("++"));
862        assert!(!masked.contains("^2^"));
863        assert!(!masked.contains("~x~"));
864        assert!(!masked.contains("~~old~~"));
865        assert!(!masked.contains("==new=="));
866        // Length preserved
867        assert_eq!(masked.len(), line.len());
868    }
869
870    // =========================================================================
871    // Edge case tests
872    // =========================================================================
873
874    #[test]
875    fn test_empty_line() {
876        assert!(!contains_keys(""));
877        assert!(!contains_superscript(""));
878        assert!(!contains_subscript(""));
879        assert!(!contains_mark(""));
880        assert_eq!(mask_pymdown_markup(""), "");
881    }
882
883    #[test]
884    fn test_unclosed_delimiters() {
885        assert!(!contains_superscript("^unclosed"));
886        assert!(!contains_subscript("~unclosed"));
887        assert!(!contains_mark("==unclosed"));
888        assert!(!contains_keys("++unclosed"));
889    }
890
891    #[test]
892    fn test_adjacent_markup() {
893        let line = "^a^^b^";
894        // This is: ^a^ followed by ^b^ (two superscripts)
895        // Wait, that's not right. Let me trace:
896        // Position 0: ^, check if double -> pos 1 is ^, YES -> skip both, i=2
897        // Position 2: ^, check if double -> pos 3 is b, NO -> start superscript
898        // Inner: b at pos 3, found_content=true
899        // Inner: ^ at pos 4, check if double -> pos 5 is nothing, NO -> valid!
900        // So we get one superscript: ^b^
901        let spans = find_superscript_spans(line);
902        // Actually wait, let me re-trace more carefully.
903        // ^a^^b^
904        // 012345
905        // i=0: ch='^', is_double? chars[1]=a, NO. Start looking for close.
906        //   j=1: 'a', found_content=true
907        //   j=2: '^', is_double? chars[3]='^', YES. Break without adding span.
908        // i=1: ch='a', not '^', continue
909        // i=2: ch='^', is_double? chars[3]='^', YES. Skip, i becomes 3 after next()
910        // Actually wait, after break from inner loop at j=2, outer while does next() which gives i=3
911        // i=3: ch='^', is_double? chars[4]='b', NO. Start looking for close.
912        //   j=4: 'b', found_content=true
913        //   j=5: '^', is_double? no more chars, NO. Valid! Add span.
914        // So we get ^b^ at positions 3-6
915        assert_eq!(spans.len(), 1);
916        assert_eq!(&line[spans[0].0..spans[0].1], "^b^");
917    }
918
919    #[test]
920    fn test_triple_tilde() {
921        // ~~~a~~~ should match ~~a~~ (strikethrough) with extra tildes as text
922        let line = "~~~a~~~";
923        let strike_spans = find_strikethrough_spans(line);
924        // The regex ~~[^~]+(?:~[^~]+)*~~ on "~~~a~~~":
925        // Try at pos 0: ~~ matches, then [^~]+ needs non-tilde at pos 2, which is ~. Fail.
926        // Try at pos 1: ~~ matches (pos 1-2), [^~]+ matches 'a' at pos 3.
927        // Then (?:~[^~]+)* tries at pos 4: ~ at 4, then [^~]+ needs non-tilde at 5, which is ~. Zero matches.
928        // Then ~~ matches at pos 4-5.
929        // So we get ~~a~~ at positions 1-6.
930        assert_eq!(strike_spans.len(), 1);
931        assert_eq!(&line[strike_spans[0].0..strike_spans[0].1], "~~a~~");
932    }
933}