rumdl_lib/utils/mkdocs_extensions.rs
1//! MkDocs PyMdown extensions support
2//!
3//! This module provides support for various PyMdown Markdown extensions
4//! commonly used with MkDocs Material:
5//!
6//! - **InlineHilite**: Inline code highlighting `` `#!python code` ``
7//! - **Keys**: Keyboard key notation `++ctrl+alt+delete++`
8//! - **Caret**: Superscript and insert `^superscript^` and `^^insert^^`
9//! - **Tilde**: Subscript and strikethrough `~subscript~` and `~~strike~~`
10//! - **Mark**: Highlight text `==highlighted==`
11//! - **SmartSymbols**: Auto-replace symbols `(c)` → `©`
12//!
13//! ## Architecture
14//!
15//! All markup detection follows a consistent span-based pattern:
16//! 1. `find_*_spans(line) -> Vec<(usize, usize)>` - find byte ranges
17//! 2. `is_in_*(line, position) -> bool` - check if position is inside markup
18//!
19//! For double-takes-precedence patterns (caret: ^^/^, tilde: ~~/~):
20//! - Double-delimiter spans are found first
21//! - Single-delimiter spans exclude positions inside double spans
22//!
23//! ## References
24//!
25//! - [PyMdown Extensions](https://facelessuser.github.io/pymdown-extensions/)
26
27use regex::Regex;
28use std::sync::LazyLock;
29
30// ============================================================================
31// Core span utilities
32// ============================================================================
33
34/// Check if a byte position falls within any span.
35/// Assumes spans are sorted by start position for early-exit optimization.
36#[inline]
37fn position_in_spans(position: usize, spans: &[(usize, usize)]) -> bool {
38 for &(start, end) in spans {
39 if position < start {
40 return false;
41 }
42 if position < end {
43 return true;
44 }
45 }
46 false
47}
48
49/// Find all regex matches as (start, end) byte spans.
50#[inline]
51fn find_regex_spans(line: &str, pattern: &Regex) -> Vec<(usize, usize)> {
52 pattern.find_iter(line).map(|m| (m.start(), m.end())).collect()
53}
54
55/// Find single-delimiter spans (like `~sub~` or `^super^`) that are NOT inside
56/// double-delimiter spans (like `~~strike~~` or `^^insert^^`).
57///
58/// Rules for single-delimiter content:
59/// - Must have at least one character between delimiters
60/// - Cannot contain whitespace (per PyMdown spec)
61/// - Cannot be inside a double-delimiter span
62fn find_single_delim_spans(line: &str, delim: char, double_spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
63 let mut spans = Vec::new();
64 let mut chars = line.char_indices().peekable();
65 let delim_len = delim.len_utf8();
66
67 while let Some((start_byte, ch)) = chars.next() {
68 // Skip if inside a double-delimiter span
69 if position_in_spans(start_byte, double_spans) {
70 continue;
71 }
72
73 if ch != delim {
74 continue;
75 }
76
77 // Check if this is a double delimiter (skip it entirely)
78 if chars.peek().is_some_and(|(_, c)| *c == delim) {
79 chars.next();
80 continue;
81 }
82
83 // Look for closing single delimiter
84 let mut found_content = false;
85 let mut has_whitespace = false;
86
87 for (byte_pos, inner_ch) in chars.by_ref() {
88 // If we enter a double-delimiter span, stop looking
89 if position_in_spans(byte_pos, double_spans) {
90 break;
91 }
92
93 if inner_ch == delim {
94 // Check it's not the start of a double delimiter
95 let is_double = chars.peek().is_some_and(|(_, c)| *c == delim);
96 if !is_double && found_content && !has_whitespace {
97 spans.push((start_byte, byte_pos + delim_len));
98 }
99 break;
100 }
101
102 found_content = true;
103 if inner_ch.is_whitespace() {
104 has_whitespace = true;
105 }
106 }
107 }
108
109 spans
110}
111
112// ============================================================================
113// InlineHilite: `#!lang code` syntax for inline code with syntax highlighting
114// ============================================================================
115
116/// Pattern to match inline hilite shebang at the start of backtick content
117static INLINE_HILITE_SHEBANG: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#!([a-zA-Z][a-zA-Z0-9_+-]*)").unwrap());
118
119/// Check if code span content starts with InlineHilite shebang
120#[inline]
121pub fn is_inline_hilite_content(content: &str) -> bool {
122 INLINE_HILITE_SHEBANG.is_match(content)
123}
124
125// ============================================================================
126// Keys: ++key++ syntax for keyboard keys
127// ============================================================================
128
129/// Pattern to match keyboard key notation: `++key++` or `++key1+key2++`
130static KEYS_PATTERN: LazyLock<Regex> =
131 LazyLock::new(|| Regex::new(r"\+\+([a-zA-Z0-9_-]+(?:\+[a-zA-Z0-9_-]+)*)\+\+").unwrap());
132
133/// Find all keyboard shortcut spans
134fn find_keys_spans(line: &str) -> Vec<(usize, usize)> {
135 if !line.contains("++") {
136 return Vec::new();
137 }
138 find_regex_spans(line, &KEYS_PATTERN)
139}
140
141/// Check if a position in a line is within a keyboard shortcut
142fn is_in_keys(line: &str, position: usize) -> bool {
143 position_in_spans(position, &find_keys_spans(line))
144}
145
146// ============================================================================
147// Caret: ^superscript^ and ^^insert^^ syntax
148// ============================================================================
149
150/// Pattern to match insert: `^^text^^` (double caret)
151/// Handles content with single carets inside (e.g., `^^a^b^^`)
152static INSERT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^\^[^\^]+(?:\^[^\^]+)*\^\^").unwrap());
153
154/// Find all insert (^^text^^) spans
155fn find_insert_spans(line: &str) -> Vec<(usize, usize)> {
156 if !line.contains("^^") {
157 return Vec::new();
158 }
159 find_regex_spans(line, &INSERT_PATTERN)
160}
161
162/// Check if a position is within superscript or insert markup
163fn is_in_caret_markup(line: &str, position: usize) -> bool {
164 if !line.contains('^') {
165 return false;
166 }
167 let insert_spans = find_insert_spans(line);
168 if position_in_spans(position, &insert_spans) {
169 return true;
170 }
171 let super_spans = find_single_delim_spans(line, '^', &insert_spans);
172 position_in_spans(position, &super_spans)
173}
174
175// ============================================================================
176// Tilde: ~subscript~ and ~~strikethrough~~ syntax
177// ============================================================================
178
179/// Pattern to match strikethrough: `~~text~~` (double tilde)
180/// Handles content with single tildes inside (e.g., `~~a~b~~`)
181static STRIKETHROUGH_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~[^~]+(?:~[^~]+)*~~").unwrap());
182
183/// Find all strikethrough (~~text~~) spans
184fn find_strikethrough_spans(line: &str) -> Vec<(usize, usize)> {
185 if !line.contains("~~") {
186 return Vec::new();
187 }
188 find_regex_spans(line, &STRIKETHROUGH_PATTERN)
189}
190
191/// Check if a position is within subscript or strikethrough markup
192fn is_in_tilde_markup(line: &str, position: usize) -> bool {
193 if !line.contains('~') {
194 return false;
195 }
196 let strike_spans = find_strikethrough_spans(line);
197 if position_in_spans(position, &strike_spans) {
198 return true;
199 }
200 let sub_spans = find_single_delim_spans(line, '~', &strike_spans);
201 position_in_spans(position, &sub_spans)
202}
203
204// ============================================================================
205// Mark: ==highlighted== syntax
206// ============================================================================
207
208/// Pattern to match highlight/mark: `==text==`
209static MARK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"==([^=]+)==").unwrap());
210
211/// Find all mark (==text==) spans
212fn find_mark_spans(line: &str) -> Vec<(usize, usize)> {
213 if !line.contains("==") {
214 return Vec::new();
215 }
216 find_regex_spans(line, &MARK_PATTERN)
217}
218
219/// Check if a position is within mark markup
220pub fn is_in_mark(line: &str, position: usize) -> bool {
221 position_in_spans(position, &find_mark_spans(line))
222}
223
224// ============================================================================
225// SmartSymbols: (c), (tm), (r), -->, <--, etc.
226// ============================================================================
227
228/// Pattern to match any SmartSymbol that might be replaced
229static SMART_SYMBOL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
230 Regex::new(r"(?:\(c\)|\(C\)|\(r\)|\(R\)|\(tm\)|\(TM\)|\(p\)|\.\.\.|-{2,3}|<->|<-|->|<=>|<=|=>|1/4|1/2|3/4|\+-|!=)")
231 .unwrap()
232});
233
234/// Find all SmartSymbol spans
235fn find_smart_symbol_spans(line: &str) -> Vec<(usize, usize)> {
236 // Quick rejection checks
237 if !line.contains('(')
238 && !line.contains("...")
239 && !line.contains("--")
240 && !line.contains("->")
241 && !line.contains("<-")
242 && !line.contains("=>")
243 && !line.contains("<=")
244 && !line.contains("1/")
245 && !line.contains("3/")
246 && !line.contains("+-")
247 && !line.contains("!=")
248 {
249 return Vec::new();
250 }
251 find_regex_spans(line, &SMART_SYMBOL_PATTERN)
252}
253
254/// Check if a position is at a SmartSymbol
255fn is_in_smart_symbol(line: &str, position: usize) -> bool {
256 position_in_spans(position, &find_smart_symbol_spans(line))
257}
258
259// ============================================================================
260// Combined utilities
261// ============================================================================
262
263/// Check if a position is within any PyMdown extension markup
264pub fn is_in_pymdown_markup(line: &str, position: usize) -> bool {
265 is_in_keys(line, position)
266 || is_in_caret_markup(line, position)
267 || is_in_tilde_markup(line, position)
268 || is_in_mark(line, position)
269 || is_in_smart_symbol(line, position)
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 // =========================================================================
277 // Core utility tests
278 // =========================================================================
279
280 #[test]
281 fn test_position_in_spans_empty() {
282 assert!(!position_in_spans(0, &[]));
283 assert!(!position_in_spans(100, &[]));
284 }
285
286 #[test]
287 fn test_position_in_spans_early_exit() {
288 let spans = [(10, 20), (30, 40)];
289 assert!(!position_in_spans(5, &spans)); // Before all spans
290 assert!(!position_in_spans(25, &spans)); // Between spans
291 assert!(!position_in_spans(50, &spans)); // After all spans
292 }
293
294 #[test]
295 fn test_position_in_spans_inside() {
296 let spans = [(10, 20), (30, 40)];
297 assert!(position_in_spans(10, &spans)); // Start of first span
298 assert!(position_in_spans(15, &spans)); // Middle of first span
299 assert!(position_in_spans(19, &spans)); // End-1 of first span
300 assert!(!position_in_spans(20, &spans)); // End of first span (exclusive)
301 assert!(position_in_spans(30, &spans)); // Start of second span
302 }
303
304 // =========================================================================
305 // InlineHilite tests
306 // =========================================================================
307
308 #[test]
309 fn test_is_inline_hilite_content() {
310 assert!(is_inline_hilite_content("#!python print()"));
311 assert!(is_inline_hilite_content("#!js code"));
312
313 assert!(!is_inline_hilite_content("regular code"));
314 assert!(!is_inline_hilite_content(" #!python with space"));
315 }
316
317 // =========================================================================
318 // Keys tests
319 // =========================================================================
320
321 #[test]
322 fn test_is_in_keys() {
323 let line = "Press ++ctrl++ here";
324 assert!(!is_in_keys(line, 0)); // "P"
325 assert!(!is_in_keys(line, 5)); // " "
326 assert!(is_in_keys(line, 6)); // first +
327 assert!(is_in_keys(line, 10)); // "r"
328 assert!(is_in_keys(line, 13)); // last +
329 assert!(!is_in_keys(line, 14)); // " "
330 }
331
332 // =========================================================================
333 // Caret tests
334 // =========================================================================
335
336 #[test]
337 fn test_is_in_caret_markup() {
338 let line = "Text ^super^ here";
339 assert!(!is_in_caret_markup(line, 0));
340 assert!(is_in_caret_markup(line, 5)); // "^"
341 assert!(is_in_caret_markup(line, 8)); // "p"
342 assert!(!is_in_caret_markup(line, 13)); // " "
343
344 let line2 = "Text ^^insert^^ here";
345 assert!(is_in_caret_markup(line2, 5)); // first ^
346 assert!(is_in_caret_markup(line2, 10)); // "e"
347 }
348
349 // =========================================================================
350 // Tilde tests
351 // =========================================================================
352
353 #[test]
354 fn test_is_in_tilde_markup() {
355 let line = "Text ~sub~ here";
356 assert!(!is_in_tilde_markup(line, 0));
357 assert!(is_in_tilde_markup(line, 5)); // "~"
358 assert!(is_in_tilde_markup(line, 7)); // "u"
359 assert!(!is_in_tilde_markup(line, 12)); // " "
360
361 let line2 = "Text ~~strike~~ here";
362 assert!(is_in_tilde_markup(line2, 5)); // first ~
363 assert!(is_in_tilde_markup(line2, 10)); // "i"
364 }
365
366 #[test]
367 fn test_find_strikethrough_spans_triple_tilde() {
368 // ~~~a~~~ should match ~~a~~ (strikethrough) — the regex should find
369 // the leftmost valid pairing, not fail on extra tildes at the boundaries.
370 let line = "~~~a~~~";
371 let spans = find_strikethrough_spans(line);
372 assert_eq!(spans.len(), 1);
373 assert_eq!(&line[spans[0].0..spans[0].1], "~~a~~");
374 }
375
376 #[test]
377 fn test_find_strikethrough_spans_internal_single_tilde() {
378 // ~~a~b~~ must match as one strikethrough span, not split into
379 // strikethrough + subscript — the regex must allow single tildes
380 // inside the strikethrough body.
381 let line = "~~a~b~~";
382 let spans = find_strikethrough_spans(line);
383 assert_eq!(spans.len(), 1);
384 assert_eq!(&line[spans[0].0..spans[0].1], "~~a~b~~");
385
386 // And no inner subscript should be detected for the same line.
387 let sub_spans = find_single_delim_spans(line, '~', &spans);
388 assert!(sub_spans.is_empty());
389 }
390
391 // =========================================================================
392 // Mark tests
393 // =========================================================================
394
395 #[test]
396 fn test_is_in_mark() {
397 let line = "Text ==highlight== more";
398 assert!(!is_in_mark(line, 0));
399 assert!(is_in_mark(line, 5)); // first =
400 assert!(is_in_mark(line, 10)); // "h"
401 assert!(!is_in_mark(line, 19)); // " "
402 }
403
404 // =========================================================================
405 // SmartSymbols tests
406 // =========================================================================
407
408 #[test]
409 fn test_is_in_smart_symbol() {
410 let line = "Copyright (c) text";
411 assert!(!is_in_smart_symbol(line, 0));
412 assert!(is_in_smart_symbol(line, 10)); // "("
413 assert!(is_in_smart_symbol(line, 11)); // "c"
414 assert!(is_in_smart_symbol(line, 12)); // ")"
415 assert!(!is_in_smart_symbol(line, 14)); // " "
416 }
417
418 // =========================================================================
419 // Combined tests
420 // =========================================================================
421
422 #[test]
423 fn test_is_in_pymdown_markup() {
424 assert!(is_in_pymdown_markup("++ctrl++", 2));
425 assert!(is_in_pymdown_markup("^super^", 1));
426 assert!(is_in_pymdown_markup("~sub~", 1));
427 assert!(is_in_pymdown_markup("~~strike~~", 2));
428 assert!(is_in_pymdown_markup("==mark==", 2));
429 assert!(is_in_pymdown_markup("(c)", 1));
430
431 assert!(!is_in_pymdown_markup("plain text", 5));
432 }
433
434 #[test]
435 fn test_empty_line() {
436 assert!(!is_in_pymdown_markup("", 0));
437 assert!(!is_in_mark("", 0));
438 assert!(!is_inline_hilite_content(""));
439 }
440}