rumdl_lib/utils/
mkdocs_tabs.rs

1use super::mkdocs_common::{BytePositionTracker, ContextStateMachine, MKDOCS_CONTENT_INDENT, get_line_indent};
2/// MkDocs Content Tabs detection utilities
3///
4/// The Tabbed extension provides support for grouped content tabs
5/// using `===` markers for tab labels and content.
6///
7/// Common patterns:
8/// - `=== "Tab 1"` - Tab with label
9/// - `=== Tab` - Tab without quotes
10/// - Content indented with 4 spaces under each tab
11use lazy_static::lazy_static;
12use regex::Regex;
13
14lazy_static! {
15    /// Pattern to match tab markers
16    /// Matches: === "Label" or === Label
17    /// Lenient: accepts unclosed quotes, escaped quotes within quotes
18    static ref TAB_MARKER: Regex = Regex::new(
19        r"^(\s*)===\s+.*$"  // Just need content after ===
20    ).unwrap();
21
22    /// Simple pattern to check for any tab marker
23    static ref TAB_START: Regex = Regex::new(
24        r"^(\s*)===\s+"
25    ).unwrap();
26}
27
28/// Check if a line is a tab marker
29pub fn is_tab_marker(line: &str) -> bool {
30    // First check if it starts like a tab marker
31    let trimmed_start = line.trim_start();
32    if !trimmed_start.starts_with("===") {
33        return false;
34    }
35
36    // Reject double === (like "=== ===")
37    // Check what comes after the first ===
38    let after_marker = &trimmed_start[3..];
39    if after_marker.trim_start().starts_with("===") {
40        return false; // Double === is invalid
41    }
42
43    let trimmed = line.trim();
44
45    // Must have content after ===
46    if trimmed.len() <= 3 || !trimmed.chars().nth(3).is_some_and(|c| c.is_whitespace()) {
47        return false;
48    }
49
50    // Be lenient with quote matching to handle real-world markdown
51    // A future rule can warn about unclosed quotes
52    // For now, just ensure there's some content after ===
53
54    // Use the original regex as a final check
55    TAB_MARKER.is_match(line)
56}
57
58/// Check if a line starts a tab section
59pub fn is_tab_start(line: &str) -> bool {
60    TAB_START.is_match(line)
61}
62
63/// Get the indentation level of a tab marker
64pub fn get_tab_indent(line: &str) -> Option<usize> {
65    if TAB_MARKER.is_match(line) {
66        // Use consistent indentation calculation (tabs = 4 spaces)
67        return Some(get_line_indent(line));
68    }
69    None
70}
71
72/// Check if a line is part of tab content (based on indentation)
73pub fn is_tab_content(line: &str, base_indent: usize) -> bool {
74    // Empty lines are not considered content on their own
75    // They're handled separately in context
76    if line.trim().is_empty() {
77        return false;
78    }
79
80    // Content must be indented at least MKDOCS_CONTENT_INDENT spaces from the tab marker
81    get_line_indent(line) >= base_indent + MKDOCS_CONTENT_INDENT
82}
83
84/// Check if content at a byte position is within a tab content area
85pub fn is_within_tab_content(content: &str, position: usize) -> bool {
86    let tracker = BytePositionTracker::new(content);
87    let mut state = ContextStateMachine::new();
88    let mut in_tab_group = false;
89
90    for (_idx, line, start, end) in tracker.iter_with_positions() {
91        // Check if we're starting a new tab
92        if is_tab_marker(line) {
93            // If this is the first tab, we're starting a tab group
94            if !in_tab_group {
95                in_tab_group = true;
96            }
97            let indent = get_tab_indent(line).unwrap_or(0);
98            state.enter_context(indent, "tab".to_string());
99        } else if state.is_in_context() {
100            // Check if we're still in tab content
101            if !line.trim().is_empty() && !is_tab_content(line, state.context_indent()) {
102                // Check if this is another tab at the same level (continues the group)
103                if is_tab_marker(line) && get_tab_indent(line).unwrap_or(0) == state.context_indent() {
104                    // Continue with new tab
105                    let indent = get_tab_indent(line).unwrap_or(0);
106                    state.enter_context(indent, "tab".to_string());
107                } else {
108                    // Non-tab content that's not properly indented ends the tab group
109                    state.exit_context();
110                    in_tab_group = false;
111                }
112            }
113        }
114
115        // Check if the position is within this line and we're in a tab
116        if start <= position && position <= end && state.is_in_context() {
117            return true;
118        }
119    }
120
121    false
122}
123
124/// Check if multiple consecutive lines form a tab group
125pub fn get_tab_group_range(lines: &[&str], start_line_idx: usize) -> Option<(usize, usize)> {
126    if start_line_idx >= lines.len() {
127        return None;
128    }
129
130    let start_line = lines[start_line_idx];
131    if !is_tab_marker(start_line) {
132        return None;
133    }
134
135    let base_indent = get_tab_indent(start_line).unwrap_or(0);
136    let mut end_line_idx = start_line_idx;
137
138    // Find where the tab group ends
139    for (idx, line) in lines.iter().enumerate().skip(start_line_idx + 1) {
140        if is_tab_marker(line) && get_tab_indent(line).unwrap_or(0) == base_indent {
141            // Another tab at the same level continues the group
142            end_line_idx = idx;
143        } else if is_tab_content(line, base_indent) {
144            // Indented content within the tab
145            end_line_idx = idx;
146        } else {
147            // Non-tab, non-content line ends the group
148            // Don't include trailing empty lines
149            break;
150        }
151    }
152
153    Some((start_line_idx, end_line_idx))
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn test_tab_marker_detection() {
162        assert!(is_tab_marker("=== \"Tab 1\""));
163        assert!(is_tab_marker("=== \"Complex Tab Label\""));
164        assert!(is_tab_marker("=== SimpleTab"));
165        assert!(is_tab_marker("  === \"Indented Tab\""));
166        assert!(!is_tab_marker("== \"Not a tab\""));
167        assert!(!is_tab_marker("==== \"Too many equals\""));
168        assert!(!is_tab_marker("Regular text"));
169    }
170
171    #[test]
172    fn test_tab_indent() {
173        assert_eq!(get_tab_indent("=== \"Tab\""), Some(0));
174        assert_eq!(get_tab_indent("  === \"Tab\""), Some(2));
175        assert_eq!(get_tab_indent("    === \"Tab\""), Some(4));
176        assert_eq!(get_tab_indent("Not a tab"), None);
177    }
178
179    #[test]
180    fn test_tab_content() {
181        // Base indent 0, content must be indented 4+
182        assert!(is_tab_content("    Content", 0));
183        assert!(is_tab_content("        More indented", 0));
184        assert!(!is_tab_content("", 0)); // Empty lines not considered content on their own
185        assert!(!is_tab_content("Not indented", 0));
186        assert!(!is_tab_content("  Only 2 spaces", 0));
187    }
188
189    #[test]
190    fn test_within_tab_content() {
191        let content = r#"# Document
192
193=== "Python"
194
195    ```python
196    def hello():
197        print("Hello")
198    ```
199
200=== "JavaScript"
201
202    ```javascript
203    function hello() {
204        console.log("Hello");
205    }
206    ```
207
208Regular text outside tabs."#;
209
210        let python_code_pos = content.find("def hello").unwrap();
211        let js_code_pos = content.find("function hello").unwrap();
212        let outside_pos = content.find("Regular text").unwrap();
213
214        assert!(is_within_tab_content(content, python_code_pos));
215        assert!(is_within_tab_content(content, js_code_pos));
216        assert!(!is_within_tab_content(content, outside_pos));
217    }
218
219    #[test]
220    fn test_tab_group_range() {
221        let content = "=== \"Tab 1\"\n    Content 1\n=== \"Tab 2\"\n    Content 2\n\nOutside";
222        let lines: Vec<&str> = content.lines().collect();
223
224        let range = get_tab_group_range(&lines, 0);
225        assert_eq!(range, Some((0, 3))); // Includes both tabs and their content
226    }
227}