Skip to main content

rumdl_lib/utils/
pymdown_blocks.rs

1//! PyMdown Extensions Blocks detection utilities
2//!
3//! This module provides detection for PyMdown Extensions "Blocks" syntax which uses
4//! `///` markers to create structured content blocks.
5//!
6//! Common patterns:
7//! - `/// caption` - Caption block for figures/tables
8//! - `/// details | Summary title` - Collapsible content
9//! - `/// admonition | Title` - Admonition with custom title
10//! - `/// html | div` - HTML wrapper block
11//! - `///` - Closing marker
12//!
13//! Blocks can have YAML options indented 4 spaces after the header line:
14//! ```text
15//! /// caption
16//!     attrs: {id: my-id}
17//! Caption text
18//! ///
19//! ```
20//!
21//! Supported block types: caption, figure-caption, details, admonition, html, definition, tab
22
23use regex::Regex;
24use std::sync::LazyLock;
25
26use crate::utils::skip_context::ByteRange;
27
28/// Pattern to match block opening markers
29/// Matches: /// block-type, /// block-type | args, etc.
30/// Does NOT match a closing /// on its own
31static BLOCK_OPEN_PATTERN: LazyLock<Regex> =
32    LazyLock::new(|| Regex::new(r"^(\s*)///\s*(?:[a-zA-Z][a-zA-Z0-9_-]*)").unwrap());
33
34/// Pattern to match block closing markers
35/// Matches: /// (with optional whitespace before and after)
36static BLOCK_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)///\s*$").unwrap());
37
38/// Check if a line is a block opening marker
39pub fn is_block_open(line: &str) -> bool {
40    BLOCK_OPEN_PATTERN.is_match(line)
41}
42
43/// Check if a line is a block closing marker (just `///`)
44pub fn is_block_close(line: &str) -> bool {
45    BLOCK_CLOSE_PATTERN.is_match(line)
46}
47
48/// Get the indentation level of a block marker
49pub fn get_block_indent(line: &str) -> usize {
50    let mut indent = 0;
51    for c in line.chars() {
52        match c {
53            ' ' => indent += 1,
54            '\t' => indent += 4, // Tabs expand to 4 spaces (CommonMark)
55            _ => break,
56        }
57    }
58    indent
59}
60
61/// Track block nesting state for a document
62#[derive(Debug, Clone, Default)]
63pub struct BlockTracker {
64    /// Stack of block indentation levels for nesting tracking
65    indent_stack: Vec<usize>,
66}
67
68impl BlockTracker {
69    pub fn new() -> Self {
70        Self::default()
71    }
72
73    /// Process a line and return whether we're inside a block after processing
74    pub fn process_line(&mut self, line: &str) -> bool {
75        let trimmed = line.trim_start();
76
77        if trimmed.starts_with("///") {
78            let indent = get_block_indent(line);
79
80            if is_block_close(line) {
81                // Closing marker - pop the matching block from stack
82                // Pop the top block if its indent is >= the closing marker's indent
83                if let Some(&top_indent) = self.indent_stack.last()
84                    && top_indent >= indent
85                {
86                    self.indent_stack.pop();
87                }
88            } else if is_block_open(line) {
89                // Opening marker - push to stack
90                self.indent_stack.push(indent);
91            }
92        }
93
94        !self.indent_stack.is_empty()
95    }
96
97    /// Check if we're currently inside a block
98    pub fn is_inside_block(&self) -> bool {
99        !self.indent_stack.is_empty()
100    }
101}
102
103/// Detect PyMdown block ranges in content
104/// Returns a vector of byte ranges (start, end) for each block
105pub fn detect_block_ranges(content: &str) -> Vec<ByteRange> {
106    let mut ranges = Vec::new();
107    let mut tracker = BlockTracker::new();
108    let mut block_start: Option<usize> = None;
109    let mut byte_offset = 0;
110
111    for line in content.lines() {
112        let line_len = line.len();
113        let was_inside = tracker.is_inside_block();
114        let is_inside = tracker.process_line(line);
115
116        // Started a new block
117        if !was_inside && is_inside {
118            block_start = Some(byte_offset);
119        }
120        // Exited a block
121        else if was_inside
122            && !is_inside
123            && let Some(start) = block_start.take()
124        {
125            // End at the end of the closing line
126            ranges.push(ByteRange {
127                start,
128                end: byte_offset + line_len,
129            });
130        }
131
132        // Account for newline
133        byte_offset += line_len + 1;
134    }
135
136    // Handle unclosed blocks at end of document
137    if let Some(start) = block_start {
138        ranges.push(ByteRange {
139            start,
140            end: content.len(),
141        });
142    }
143
144    ranges
145}
146
147/// Check if a byte position is within a block
148pub fn is_within_block_ranges(ranges: &[ByteRange], position: usize) -> bool {
149    ranges.iter().any(|r| position >= r.start && position < r.end)
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn test_block_open_detection() {
158        // Valid block openings
159        assert!(is_block_open("/// caption"));
160        assert!(is_block_open("/// details | Summary"));
161        assert!(is_block_open("/// admonition | Custom Title"));
162        assert!(is_block_open("/// html | div"));
163        assert!(is_block_open("/// figure-caption"));
164        assert!(is_block_open("  /// caption")); // Indented
165
166        // Invalid patterns
167        assert!(!is_block_open("///")); // Just closing marker
168        assert!(!is_block_open("///  ")); // Just closing with trailing space
169        assert!(!is_block_open("Regular text"));
170        assert!(!is_block_open("# Heading"));
171        assert!(!is_block_open("```python")); // Code fence
172        assert!(!is_block_open("// comment")); // Not enough slashes
173    }
174
175    #[test]
176    fn test_block_close_detection() {
177        assert!(is_block_close("///"));
178        assert!(is_block_close("///  "));
179        assert!(is_block_close("  ///"));
180        assert!(is_block_close("    ///  "));
181
182        assert!(!is_block_close("/// caption"));
183        assert!(!is_block_close("/// details | Summary"));
184        assert!(!is_block_close("///caption")); // No space, but this matches opening
185    }
186
187    #[test]
188    fn test_block_tracker() {
189        let mut tracker = BlockTracker::new();
190
191        // Enter a block
192        assert!(tracker.process_line("/// caption"));
193        assert!(tracker.is_inside_block());
194
195        // Inside content
196        assert!(tracker.process_line("This is content."));
197        assert!(tracker.is_inside_block());
198
199        // Exit the block
200        assert!(!tracker.process_line("///"));
201        assert!(!tracker.is_inside_block());
202    }
203
204    #[test]
205    fn test_nested_blocks() {
206        let mut tracker = BlockTracker::new();
207
208        // Outer block
209        assert!(tracker.process_line("/// details | Outer"));
210        assert!(tracker.is_inside_block());
211
212        // Inner block
213        assert!(tracker.process_line("  /// caption"));
214        assert!(tracker.is_inside_block());
215
216        // Content
217        assert!(tracker.process_line("    Content"));
218        assert!(tracker.is_inside_block());
219
220        // Close inner
221        assert!(tracker.process_line("  ///"));
222        assert!(tracker.is_inside_block());
223
224        // Close outer
225        assert!(!tracker.process_line("///"));
226        assert!(!tracker.is_inside_block());
227    }
228
229    #[test]
230    fn test_detect_block_ranges() {
231        let content = r#"# Heading
232
233/// caption
234Table caption here.
235///
236
237Regular text.
238
239/// details | Click to expand
240Hidden content.
241///
242"#;
243        let ranges = detect_block_ranges(content);
244        assert_eq!(ranges.len(), 2);
245
246        // First block
247        let first_block_content = &content[ranges[0].start..ranges[0].end];
248        assert!(first_block_content.contains("caption"));
249        assert!(first_block_content.contains("Table caption here"));
250
251        // Second block
252        let second_block_content = &content[ranges[1].start..ranges[1].end];
253        assert!(second_block_content.contains("details"));
254        assert!(second_block_content.contains("Hidden content"));
255    }
256
257    #[test]
258    fn test_block_with_yaml_options() {
259        let content = r#"/// caption
260    attrs: {id: my-id, class: special}
261Caption text here.
262///
263"#;
264        let ranges = detect_block_ranges(content);
265        assert_eq!(ranges.len(), 1);
266
267        let block_content = &content[ranges[0].start..ranges[0].end];
268        assert!(block_content.contains("attrs:"));
269        assert!(block_content.contains("Caption text"));
270    }
271
272    #[test]
273    fn test_unclosed_block() {
274        let content = r#"/// caption
275This block is never closed.
276"#;
277        let ranges = detect_block_ranges(content);
278        assert_eq!(ranges.len(), 1);
279        // Should include all content to end of document
280        assert_eq!(ranges[0].end, content.len());
281    }
282
283    #[test]
284    fn test_prepend_caption() {
285        // Caption before content using | <
286        let content = r#"![image](./image.jpeg)
287
288/// caption | <
289Caption above the image
290///
291"#;
292        let ranges = detect_block_ranges(content);
293        assert_eq!(ranges.len(), 1);
294    }
295
296    #[test]
297    fn test_figure_caption_with_number() {
298        let content = r#"/// figure-caption | 12
299Figure 12: Description
300///
301"#;
302        let ranges = detect_block_ranges(content);
303        assert_eq!(ranges.len(), 1);
304    }
305}
306
307#[cfg(test)]
308mod integration_tests {
309    //! Integration tests verifying LintContext correctly marks lines inside PyMdown blocks
310    use crate::config::MarkdownFlavor;
311    use crate::lint_context::LintContext;
312
313    /// Test line_info flag is correctly set for PyMdown blocks
314    #[test]
315    fn test_line_info_in_pymdown_block_flag() {
316        let content = r#"# Heading
317/// caption
318Content line
319///
320Normal line
321"#;
322
323        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
324
325        // Line 1 (Heading) - not in block
326        assert!(
327            !ctx.line_info(1).is_some_and(|info| info.in_pymdown_block),
328            "Line 1 should not be in PyMdown block"
329        );
330
331        // Line 2 (/// caption) - is in block (opening marker is part of block)
332        assert!(
333            ctx.line_info(2).is_some_and(|info| info.in_pymdown_block),
334            "Line 2 should be in PyMdown block"
335        );
336
337        // Line 3 (Content line) - is in block
338        assert!(
339            ctx.line_info(3).is_some_and(|info| info.in_pymdown_block),
340            "Line 3 should be in PyMdown block"
341        );
342
343        // Line 4 (///) - is in block (closing marker is part of block)
344        assert!(
345            ctx.line_info(4).is_some_and(|info| info.in_pymdown_block),
346            "Line 4 should be in PyMdown block"
347        );
348
349        // Line 5 (Normal line) - not in block
350        assert!(
351            !ctx.line_info(5).is_some_and(|info| info.in_pymdown_block),
352            "Line 5 should not be in PyMdown block"
353        );
354    }
355
356    /// Test that standard flavor does NOT enable PyMdown block detection
357    #[test]
358    fn test_standard_flavor_ignores_pymdown_syntax() {
359        let content = r#"# Heading
360/// caption
361Content line
362///
363Normal line
364"#;
365
366        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
367
368        // In Standard flavor, PyMdown blocks should NOT be detected
369        assert!(
370            !ctx.line_info(2).is_some_and(|info| info.in_pymdown_block),
371            "Standard flavor should NOT recognize PyMdown blocks"
372        );
373        assert!(
374            !ctx.line_info(3).is_some_and(|info| info.in_pymdown_block),
375            "Standard flavor should NOT recognize PyMdown blocks"
376        );
377    }
378
379    /// Test nested PyMdown blocks
380    #[test]
381    fn test_nested_pymdown_blocks() {
382        let content = r#"# Heading
383/// details | Outer
384Outer content
385  /// caption
386  Nested content
387  ///
388More outer content
389///
390Normal line
391"#;
392
393        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
394
395        // All lines 2-8 should be inside a PyMdown block
396        for line_num in 2..=8 {
397            assert!(
398                ctx.line_info(line_num).is_some_and(|info| info.in_pymdown_block),
399                "Line {line_num} should be in PyMdown block"
400            );
401        }
402
403        // Line 9 (Normal line) - not in block
404        assert!(
405            !ctx.line_info(9).is_some_and(|info| info.in_pymdown_block),
406            "Line 9 should not be in PyMdown block"
407        );
408    }
409
410    /// Test filtered_lines skips PyMdown blocks correctly
411    #[test]
412    fn test_filtered_lines_skips_pymdown_blocks() {
413        use crate::filtered_lines::FilteredLinesExt;
414
415        let content = r#"Line 1
416/// caption
417Inside block line 3
418///
419Line 5
420"#;
421
422        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
423
424        let filtered: Vec<_> = ctx.filtered_lines().skip_pymdown_blocks().into_iter().collect();
425
426        // Should only contain lines 1 and 5 (not lines 2-4 which are in the block)
427        let line_nums: Vec<_> = filtered.iter().map(|l| l.line_num).collect();
428        assert_eq!(line_nums, vec![1, 5], "filtered_lines should skip PyMdown block lines");
429    }
430}