rumdl_lib/utils/
mkdocs_tabs.rs

1use super::mkdocs_common::{BytePositionTracker, ContextStateMachine, MKDOCS_CONTENT_INDENT, get_line_indent};
2use regex::Regex;
3/// MkDocs Content Tabs detection utilities
4///
5/// The Tabbed extension provides support for grouped content tabs
6/// using `===` markers for tab labels and content.
7///
8/// Common patterns:
9/// - `=== "Tab 1"` - Tab with label
10/// - `=== Tab` - Tab without quotes
11/// - Content indented with 4 spaces under each tab
12use std::sync::LazyLock;
13
14/// Pattern to match tab markers
15/// Matches: === "Label" or === Label
16/// Lenient: accepts unclosed quotes, escaped quotes within quotes
17static TAB_MARKER: LazyLock<Regex> = LazyLock::new(|| {
18    Regex::new(
19        r"^(\s*)===\s+.*$", // Just need content after ===
20    )
21    .unwrap()
22});
23
24/// Simple pattern to check for any tab marker
25static TAB_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)===\s+").unwrap());
26
27/// Check if a line is a tab marker
28pub fn is_tab_marker(line: &str) -> bool {
29    // First check if it starts like a tab marker
30    let trimmed_start = line.trim_start();
31    if !trimmed_start.starts_with("===") {
32        return false;
33    }
34
35    // Reject double === (like "=== ===")
36    // Check what comes after the first ===
37    let after_marker = &trimmed_start[3..];
38    if after_marker.trim_start().starts_with("===") {
39        return false; // Double === is invalid
40    }
41
42    let trimmed = line.trim();
43
44    // Must have content after ===
45    if trimmed.len() <= 3 || !trimmed.chars().nth(3).is_some_and(|c| c.is_whitespace()) {
46        return false;
47    }
48
49    // Be lenient with quote matching to handle real-world markdown
50    // A future rule can warn about unclosed quotes
51    // For now, just ensure there's some content after ===
52
53    // Use the original regex as a final check
54    TAB_MARKER.is_match(line)
55}
56
57/// Check if a line starts a tab section
58pub fn is_tab_start(line: &str) -> bool {
59    TAB_START.is_match(line)
60}
61
62/// Get the indentation level of a tab marker
63pub fn get_tab_indent(line: &str) -> Option<usize> {
64    if TAB_MARKER.is_match(line) {
65        // Use consistent indentation calculation (tabs = 4 spaces)
66        return Some(get_line_indent(line));
67    }
68    None
69}
70
71/// Check if a line is part of tab content (based on indentation)
72pub fn is_tab_content(line: &str, base_indent: usize) -> bool {
73    // Empty lines are not considered content on their own
74    // They're handled separately in context
75    if line.trim().is_empty() {
76        return false;
77    }
78
79    // Content must be indented at least MKDOCS_CONTENT_INDENT spaces from the tab marker
80    get_line_indent(line) >= base_indent + MKDOCS_CONTENT_INDENT
81}
82
83/// Check if content at a byte position is within a tab content area
84pub fn is_within_tab_content(content: &str, position: usize) -> bool {
85    let tracker = BytePositionTracker::new(content);
86    let mut state = ContextStateMachine::new();
87    let mut in_tab_group = false;
88
89    for (_idx, line, start, end) in tracker.iter_with_positions() {
90        // Check if we're starting a new tab
91        if is_tab_marker(line) {
92            // If this is the first tab, we're starting a tab group
93            if !in_tab_group {
94                in_tab_group = true;
95            }
96            let indent = get_tab_indent(line).unwrap_or(0);
97            state.enter_context(indent, "tab".to_string());
98        } else if state.is_in_context() {
99            // Check if we're still in tab content
100            if !line.trim().is_empty() && !is_tab_content(line, state.context_indent()) {
101                // Check if this is another tab at the same level (continues the group)
102                if is_tab_marker(line) && get_tab_indent(line).unwrap_or(0) == state.context_indent() {
103                    // Continue with new tab
104                    let indent = get_tab_indent(line).unwrap_or(0);
105                    state.enter_context(indent, "tab".to_string());
106                } else {
107                    // Non-tab content that's not properly indented ends the tab group
108                    state.exit_context();
109                    in_tab_group = false;
110                }
111            }
112        }
113
114        // Check if the position is within this line and we're in a tab
115        if start <= position && position <= end && state.is_in_context() {
116            return true;
117        }
118    }
119
120    false
121}
122
123/// Check if multiple consecutive lines form a tab group
124pub fn get_tab_group_range(lines: &[&str], start_line_idx: usize) -> Option<(usize, usize)> {
125    if start_line_idx >= lines.len() {
126        return None;
127    }
128
129    let start_line = lines[start_line_idx];
130    if !is_tab_marker(start_line) {
131        return None;
132    }
133
134    let base_indent = get_tab_indent(start_line).unwrap_or(0);
135    let mut end_line_idx = start_line_idx;
136
137    // Find where the tab group ends
138    for (idx, line) in lines.iter().enumerate().skip(start_line_idx + 1) {
139        if is_tab_marker(line) && get_tab_indent(line).unwrap_or(0) == base_indent {
140            // Another tab at the same level continues the group
141            end_line_idx = idx;
142        } else if is_tab_content(line, base_indent) {
143            // Indented content within the tab
144            end_line_idx = idx;
145        } else {
146            // Non-tab, non-content line ends the group
147            // Don't include trailing empty lines
148            break;
149        }
150    }
151
152    Some((start_line_idx, end_line_idx))
153}
154
155#[cfg(test)]
156mod tests {
157    use super::*;
158
159    #[test]
160    fn test_tab_marker_detection() {
161        assert!(is_tab_marker("=== \"Tab 1\""));
162        assert!(is_tab_marker("=== \"Complex Tab Label\""));
163        assert!(is_tab_marker("=== SimpleTab"));
164        assert!(is_tab_marker("  === \"Indented Tab\""));
165        assert!(!is_tab_marker("== \"Not a tab\""));
166        assert!(!is_tab_marker("==== \"Too many equals\""));
167        assert!(!is_tab_marker("Regular text"));
168    }
169
170    #[test]
171    fn test_tab_indent() {
172        assert_eq!(get_tab_indent("=== \"Tab\""), Some(0));
173        assert_eq!(get_tab_indent("  === \"Tab\""), Some(2));
174        assert_eq!(get_tab_indent("    === \"Tab\""), Some(4));
175        assert_eq!(get_tab_indent("Not a tab"), None);
176    }
177
178    #[test]
179    fn test_tab_content() {
180        // Base indent 0, content must be indented 4+
181        assert!(is_tab_content("    Content", 0));
182        assert!(is_tab_content("        More indented", 0));
183        assert!(!is_tab_content("", 0)); // Empty lines not considered content on their own
184        assert!(!is_tab_content("Not indented", 0));
185        assert!(!is_tab_content("  Only 2 spaces", 0));
186    }
187
188    #[test]
189    fn test_within_tab_content() {
190        let content = r#"# Document
191
192=== "Python"
193
194    ```python
195    def hello():
196        print("Hello")
197    ```
198
199=== "JavaScript"
200
201    ```javascript
202    function hello() {
203        console.log("Hello");
204    }
205    ```
206
207Regular text outside tabs."#;
208
209        let python_code_pos = content.find("def hello").unwrap();
210        let js_code_pos = content.find("function hello").unwrap();
211        let outside_pos = content.find("Regular text").unwrap();
212
213        assert!(is_within_tab_content(content, python_code_pos));
214        assert!(is_within_tab_content(content, js_code_pos));
215        assert!(!is_within_tab_content(content, outside_pos));
216    }
217
218    #[test]
219    fn test_tab_group_range() {
220        let content = "=== \"Tab 1\"\n    Content 1\n=== \"Tab 2\"\n    Content 2\n\nOutside";
221        let lines: Vec<&str> = content.lines().collect();
222
223        let range = get_tab_group_range(&lines, 0);
224        assert_eq!(range, Some((0, 3))); // Includes both tabs and their content
225    }
226}