Skip to main content

rumdl_lib/utils/
pymdown_blocks.rs

1//! PyMdown Extensions Blocks detection utilities
2//!
3//! This module provides detection for PyMdown Extensions "Blocks" syntax which uses
4//! `///` markers to create structured content blocks.
5//!
6//! Common patterns:
7//! - `/// caption` - Caption block for figures/tables
8//! - `/// details | Summary title` - Collapsible content
9//! - `/// admonition | Title` - Admonition with custom title
10//! - `/// html | div` - HTML wrapper block
11//! - `///` - Closing marker
12//!
13//! Blocks can have YAML options indented 4 spaces after the header line:
14//! ```text
15//! /// caption
16//!     attrs: {id: my-id}
17//! Caption text
18//! ///
19//! ```
20//!
21//! Supported block types: caption, figure-caption, details, admonition, html, definition, tab
22
23use regex::Regex;
24use std::sync::LazyLock;
25
26use crate::utils::skip_context::ByteRange;
27
28/// Pattern to match block opening markers
29/// Matches: /// block-type, /// block-type | args, etc.
30/// Does NOT match a closing /// on its own
31static BLOCK_OPEN_PATTERN: LazyLock<Regex> =
32    LazyLock::new(|| Regex::new(r"^(\s*)///\s*(?:[a-zA-Z][a-zA-Z0-9_-]*)").unwrap());
33
34/// Pattern to match block closing markers
35/// Matches: /// (with optional whitespace before and after)
36static BLOCK_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)///\s*$").unwrap());
37
38/// Check if a line is a block opening marker
39pub fn is_block_open(line: &str) -> bool {
40    BLOCK_OPEN_PATTERN.is_match(line)
41}
42
43/// Check if a line is a block closing marker (just `///`)
44pub fn is_block_close(line: &str) -> bool {
45    BLOCK_CLOSE_PATTERN.is_match(line)
46}
47
48/// Get the indentation level of a block marker
49pub fn get_block_indent(line: &str) -> usize {
50    let mut indent = 0;
51    for c in line.chars() {
52        match c {
53            ' ' => indent += 1,
54            '\t' => indent += 4, // Tabs expand to 4 spaces (CommonMark)
55            _ => break,
56        }
57    }
58    indent
59}
60
61/// Track block nesting state for a document
62#[derive(Debug, Clone, Default)]
63pub struct BlockTracker {
64    /// Stack of block indentation levels for nesting tracking
65    indent_stack: Vec<usize>,
66}
67
68impl BlockTracker {
69    pub fn new() -> Self {
70        Self::default()
71    }
72
73    /// Process a line and return whether we're inside a block after processing
74    pub fn process_line(&mut self, line: &str) -> bool {
75        let trimmed = line.trim_start();
76
77        if trimmed.starts_with("///") {
78            let indent = get_block_indent(line);
79
80            if is_block_close(line) {
81                // Closing marker - pop the matching block from stack
82                // Pop the top block if its indent is >= the closing marker's indent
83                if let Some(&top_indent) = self.indent_stack.last()
84                    && top_indent >= indent
85                {
86                    self.indent_stack.pop();
87                }
88            } else if is_block_open(line) {
89                // Opening marker - push to stack
90                self.indent_stack.push(indent);
91            }
92        }
93
94        !self.indent_stack.is_empty()
95    }
96
97    /// Check if we're currently inside a block
98    pub fn is_inside_block(&self) -> bool {
99        !self.indent_stack.is_empty()
100    }
101
102    /// Get current nesting depth
103    pub fn depth(&self) -> usize {
104        self.indent_stack.len()
105    }
106}
107
108/// Detect PyMdown block ranges in content
109/// Returns a vector of byte ranges (start, end) for each block
110pub fn detect_block_ranges(content: &str) -> Vec<ByteRange> {
111    let mut ranges = Vec::new();
112    let mut tracker = BlockTracker::new();
113    let mut block_start: Option<usize> = None;
114    let mut byte_offset = 0;
115
116    for line in content.lines() {
117        let line_len = line.len();
118        let was_inside = tracker.is_inside_block();
119        let is_inside = tracker.process_line(line);
120
121        // Started a new block
122        if !was_inside && is_inside {
123            block_start = Some(byte_offset);
124        }
125        // Exited a block
126        else if was_inside
127            && !is_inside
128            && let Some(start) = block_start.take()
129        {
130            // End at the end of the closing line
131            ranges.push(ByteRange {
132                start,
133                end: byte_offset + line_len,
134            });
135        }
136
137        // Account for newline
138        byte_offset += line_len + 1;
139    }
140
141    // Handle unclosed blocks at end of document
142    if let Some(start) = block_start {
143        ranges.push(ByteRange {
144            start,
145            end: content.len(),
146        });
147    }
148
149    ranges
150}
151
152/// Check if a byte position is within a block
153pub fn is_within_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
154    ranges.iter().any(|r| position >= r.start && position < r.end)
155}
156
157/// Extract the block type from an opening line
158/// Returns the block type like "caption", "details", "admonition", etc.
159pub fn extract_block_type(line: &str) -> Option<&str> {
160    let trimmed = line.trim_start();
161    if !trimmed.starts_with("///") {
162        return None;
163    }
164
165    let after_marker = trimmed[3..].trim_start();
166    // Block type is the first word (before any | or whitespace)
167    after_marker
168        .split(|c: char| c.is_whitespace() || c == '|')
169        .next()
170        .filter(|s| !s.is_empty())
171}
172
173/// Extract arguments from a block opening line (text after |)
174pub fn extract_block_args(line: &str) -> Option<&str> {
175    let trimmed = line.trim_start();
176    if !trimmed.starts_with("///") {
177        return None;
178    }
179
180    // Find the | separator
181    if let Some(pipe_pos) = trimmed.find('|') {
182        let args = trimmed[pipe_pos + 1..].trim();
183        if !args.is_empty() {
184            return Some(args);
185        }
186    }
187    None
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_block_open_detection() {
196        // Valid block openings
197        assert!(is_block_open("/// caption"));
198        assert!(is_block_open("/// details | Summary"));
199        assert!(is_block_open("/// admonition | Custom Title"));
200        assert!(is_block_open("/// html | div"));
201        assert!(is_block_open("/// figure-caption"));
202        assert!(is_block_open("  /// caption")); // Indented
203
204        // Invalid patterns
205        assert!(!is_block_open("///")); // Just closing marker
206        assert!(!is_block_open("///  ")); // Just closing with trailing space
207        assert!(!is_block_open("Regular text"));
208        assert!(!is_block_open("# Heading"));
209        assert!(!is_block_open("```python")); // Code fence
210        assert!(!is_block_open("// comment")); // Not enough slashes
211    }
212
213    #[test]
214    fn test_block_close_detection() {
215        assert!(is_block_close("///"));
216        assert!(is_block_close("///  "));
217        assert!(is_block_close("  ///"));
218        assert!(is_block_close("    ///  "));
219
220        assert!(!is_block_close("/// caption"));
221        assert!(!is_block_close("/// details | Summary"));
222        assert!(!is_block_close("///caption")); // No space, but this matches opening
223    }
224
225    #[test]
226    fn test_block_tracker() {
227        let mut tracker = BlockTracker::new();
228
229        // Enter a block
230        assert!(tracker.process_line("/// caption"));
231        assert!(tracker.is_inside_block());
232        assert_eq!(tracker.depth(), 1);
233
234        // Inside content
235        assert!(tracker.process_line("This is content."));
236        assert!(tracker.is_inside_block());
237
238        // Exit the block
239        assert!(!tracker.process_line("///"));
240        assert!(!tracker.is_inside_block());
241        assert_eq!(tracker.depth(), 0);
242    }
243
244    #[test]
245    fn test_nested_blocks() {
246        let mut tracker = BlockTracker::new();
247
248        // Outer block
249        assert!(tracker.process_line("/// details | Outer"));
250        assert_eq!(tracker.depth(), 1);
251
252        // Inner block
253        assert!(tracker.process_line("  /// caption"));
254        assert_eq!(tracker.depth(), 2);
255
256        // Content
257        assert!(tracker.process_line("    Content"));
258        assert!(tracker.is_inside_block());
259
260        // Close inner
261        assert!(tracker.process_line("  ///"));
262        assert_eq!(tracker.depth(), 1);
263
264        // Close outer
265        assert!(!tracker.process_line("///"));
266        assert_eq!(tracker.depth(), 0);
267    }
268
269    #[test]
270    fn test_detect_block_ranges() {
271        let content = r#"# Heading
272
273/// caption
274Table caption here.
275///
276
277Regular text.
278
279/// details | Click to expand
280Hidden content.
281///
282"#;
283        let ranges = detect_block_ranges(content);
284        assert_eq!(ranges.len(), 2);
285
286        // First block
287        let first_block_content = &content[ranges[0].start..ranges[0].end];
288        assert!(first_block_content.contains("caption"));
289        assert!(first_block_content.contains("Table caption here"));
290
291        // Second block
292        let second_block_content = &content[ranges[1].start..ranges[1].end];
293        assert!(second_block_content.contains("details"));
294        assert!(second_block_content.contains("Hidden content"));
295    }
296
297    #[test]
298    fn test_extract_block_type() {
299        assert_eq!(extract_block_type("/// caption"), Some("caption"));
300        assert_eq!(extract_block_type("/// details | Summary"), Some("details"));
301        assert_eq!(extract_block_type("/// figure-caption"), Some("figure-caption"));
302        assert_eq!(extract_block_type("/// admonition | Title"), Some("admonition"));
303        assert_eq!(extract_block_type("  /// html | div"), Some("html"));
304
305        assert_eq!(extract_block_type("///"), None);
306        assert_eq!(extract_block_type("Regular text"), None);
307    }
308
309    #[test]
310    fn test_extract_block_args() {
311        assert_eq!(extract_block_args("/// details | Summary Title"), Some("Summary Title"));
312        assert_eq!(extract_block_args("/// caption | <"), Some("<"));
313        assert_eq!(extract_block_args("/// figure-caption | 12"), Some("12"));
314        assert_eq!(extract_block_args("/// html | div"), Some("div"));
315
316        assert_eq!(extract_block_args("/// caption"), None);
317        assert_eq!(extract_block_args("///"), None);
318    }
319
320    #[test]
321    fn test_block_with_yaml_options() {
322        let content = r#"/// caption
323    attrs: {id: my-id, class: special}
324Caption text here.
325///
326"#;
327        let ranges = detect_block_ranges(content);
328        assert_eq!(ranges.len(), 1);
329
330        let block_content = &content[ranges[0].start..ranges[0].end];
331        assert!(block_content.contains("attrs:"));
332        assert!(block_content.contains("Caption text"));
333    }
334
335    #[test]
336    fn test_unclosed_block() {
337        let content = r#"/// caption
338This block is never closed.
339"#;
340        let ranges = detect_block_ranges(content);
341        assert_eq!(ranges.len(), 1);
342        // Should include all content to end of document
343        assert_eq!(ranges[0].end, content.len());
344    }
345
346    #[test]
347    fn test_prepend_caption() {
348        // Caption before content using | <
349        let content = r#"![image](./image.jpeg)
350
351/// caption | <
352Caption above the image
353///
354"#;
355        let ranges = detect_block_ranges(content);
356        assert_eq!(ranges.len(), 1);
357
358        let args = extract_block_args("/// caption | <");
359        assert_eq!(args, Some("<"));
360    }
361
362    #[test]
363    fn test_figure_caption_with_number() {
364        let content = r#"/// figure-caption | 12
365Figure 12: Description
366///
367"#;
368        let ranges = detect_block_ranges(content);
369        assert_eq!(ranges.len(), 1);
370
371        let block_type = extract_block_type("/// figure-caption | 12");
372        assert_eq!(block_type, Some("figure-caption"));
373    }
374}
375
376#[cfg(test)]
377mod integration_tests {
378    //! Integration tests verifying LintContext correctly marks lines inside PyMdown blocks
379    use crate::config::MarkdownFlavor;
380    use crate::lint_context::LintContext;
381
382    /// Test line_info flag is correctly set for PyMdown blocks
383    #[test]
384    fn test_line_info_in_pymdown_block_flag() {
385        let content = r#"# Heading
386/// caption
387Content line
388///
389Normal line
390"#;
391
392        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
393
394        // Line 1 (Heading) - not in block
395        assert!(
396            !ctx.line_info(1).is_some_and(|info| info.in_pymdown_block),
397            "Line 1 should not be in PyMdown block"
398        );
399
400        // Line 2 (/// caption) - is in block (opening marker is part of block)
401        assert!(
402            ctx.line_info(2).is_some_and(|info| info.in_pymdown_block),
403            "Line 2 should be in PyMdown block"
404        );
405
406        // Line 3 (Content line) - is in block
407        assert!(
408            ctx.line_info(3).is_some_and(|info| info.in_pymdown_block),
409            "Line 3 should be in PyMdown block"
410        );
411
412        // Line 4 (///) - is in block (closing marker is part of block)
413        assert!(
414            ctx.line_info(4).is_some_and(|info| info.in_pymdown_block),
415            "Line 4 should be in PyMdown block"
416        );
417
418        // Line 5 (Normal line) - not in block
419        assert!(
420            !ctx.line_info(5).is_some_and(|info| info.in_pymdown_block),
421            "Line 5 should not be in PyMdown block"
422        );
423    }
424
425    /// Test that standard flavor does NOT enable PyMdown block detection
426    #[test]
427    fn test_standard_flavor_ignores_pymdown_syntax() {
428        let content = r#"# Heading
429/// caption
430Content line
431///
432Normal line
433"#;
434
435        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
436
437        // In Standard flavor, PyMdown blocks should NOT be detected
438        assert!(
439            !ctx.line_info(2).is_some_and(|info| info.in_pymdown_block),
440            "Standard flavor should NOT recognize PyMdown blocks"
441        );
442        assert!(
443            !ctx.line_info(3).is_some_and(|info| info.in_pymdown_block),
444            "Standard flavor should NOT recognize PyMdown blocks"
445        );
446    }
447
448    /// Test nested PyMdown blocks
449    #[test]
450    fn test_nested_pymdown_blocks() {
451        let content = r#"# Heading
452/// details | Outer
453Outer content
454  /// caption
455  Nested content
456  ///
457More outer content
458///
459Normal line
460"#;
461
462        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
463
464        // All lines 2-8 should be inside a PyMdown block
465        for line_num in 2..=8 {
466            assert!(
467                ctx.line_info(line_num).is_some_and(|info| info.in_pymdown_block),
468                "Line {line_num} should be in PyMdown block"
469            );
470        }
471
472        // Line 9 (Normal line) - not in block
473        assert!(
474            !ctx.line_info(9).is_some_and(|info| info.in_pymdown_block),
475            "Line 9 should not be in PyMdown block"
476        );
477    }
478
479    /// Test filtered_lines skips PyMdown blocks correctly
480    #[test]
481    fn test_filtered_lines_skips_pymdown_blocks() {
482        use crate::filtered_lines::FilteredLinesExt;
483
484        let content = r#"Line 1
485/// caption
486Inside block line 3
487///
488Line 5
489"#;
490
491        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
492
493        let filtered: Vec<_> = ctx.filtered_lines().skip_pymdown_blocks().into_iter().collect();
494
495        // Should only contain lines 1 and 5 (not lines 2-4 which are in the block)
496        let line_nums: Vec<_> = filtered.iter().map(|l| l.line_num).collect();
497        assert_eq!(line_nums, vec![1, 5], "filtered_lines should skip PyMdown block lines");
498    }
499}