rumdl_lib/utils/
mkdocs_common.rs

1/// Common utilities and constants for MkDocs pattern detection
2///
3/// This module provides shared functionality used across all MkDocs feature
4/// detection modules to reduce code duplication and improve maintainability.
5use crate::config::MarkdownFlavor;
6
7/// Standard indentation size for MkDocs content blocks
8/// Most MkDocs features require content to be indented by 4 spaces
9pub const MKDOCS_CONTENT_INDENT: usize = 4;
10
11/// Maximum reasonable length for references and identifiers
12pub const MAX_REFERENCE_LENGTH: usize = 200;
13
14/// Maximum reasonable length for individual path components
15pub const MAX_COMPONENT_LENGTH: usize = 50;
16
17/// Trait for MkDocs pattern detection implementations
18/// All MkDocs features should implement this trait for consistency
19pub trait MkDocsPattern: Send + Sync {
20    /// Check if a line matches the pattern's start marker
21    fn is_marker(&self, line: &str) -> bool;
22
23    /// Get the base indentation level of a marker line
24    fn get_indent(&self, line: &str) -> Option<usize>;
25
26    /// Check if a line is part of the pattern's content area
27    fn is_content(&self, line: &str, base_indent: usize) -> bool;
28
29    /// Check if a byte position is within this pattern's context
30    fn is_within_context(&self, content: &str, position: usize) -> bool;
31
32    /// Get a descriptive name for this pattern (for debugging)
33    fn name(&self) -> &'static str;
34}
35
36/// Utility for tracking byte positions through document lines
37/// Reduces duplication of line-by-line byte position tracking logic
38pub struct BytePositionTracker<'a> {
39    pub content: &'a str,
40    pub lines: Vec<&'a str>,
41}
42
43impl<'a> BytePositionTracker<'a> {
44    /// Create a new byte position tracker for the given content
45    pub fn new(content: &'a str) -> Self {
46        Self {
47            content,
48            lines: content.lines().collect(),
49        }
50    }
51
52    /// Iterate through lines with byte position tracking
53    /// Returns an iterator of (line_index, line_content, byte_start, byte_end)
54    pub fn iter_with_positions(&self) -> impl Iterator<Item = (usize, &'a str, usize, usize)> + '_ {
55        let mut byte_pos = 0;
56        self.lines.iter().enumerate().map(move |(idx, line)| {
57            let start = byte_pos;
58            let end = byte_pos + line.len();
59            byte_pos = end + 1; // Account for newline
60            (idx, *line, start, end)
61        })
62    }
63
64    /// Check if a position falls within any line matching the given predicate
65    pub fn is_position_in_matching_lines<F>(&self, position: usize, predicate: F) -> bool
66    where
67        F: Fn(usize, &str) -> bool,
68    {
69        for (idx, line, start, end) in self.iter_with_positions() {
70            if start <= position && position <= end && predicate(idx, line) {
71                return true;
72            }
73        }
74        false
75    }
76}
77
78/// Check if we should process MkDocs patterns for the given flavor
79#[inline]
80pub fn should_check_mkdocs(flavor: MarkdownFlavor) -> bool {
81    matches!(flavor, MarkdownFlavor::MkDocs)
82}
83
84/// Extract indentation from a line (counts spaces and tabs)
85pub fn get_line_indent(line: &str) -> usize {
86    line.chars()
87        .take_while(|&c| c == ' ' || c == '\t')
88        .map(|c| if c == '\t' { 4 } else { 1 }) // Treat tabs as 4 spaces
89        .sum()
90}
91
92/// Check if a line is indented enough to be content
93pub fn is_indented_content(line: &str, base_indent: usize, required_indent: usize) -> bool {
94    // Empty lines are handled separately by callers
95    if line.trim().is_empty() {
96        return false;
97    }
98
99    get_line_indent(line) >= base_indent + required_indent
100}
101
102/// State machine for tracking nested context boundaries
103pub struct ContextStateMachine {
104    in_context: bool,
105    context_indent: usize,
106    context_type: Option<String>,
107}
108
109impl ContextStateMachine {
110    pub fn new() -> Self {
111        Self {
112            in_context: false,
113            context_indent: 0,
114            context_type: None,
115        }
116    }
117
118    /// Enter a new context with the given indentation and type
119    pub fn enter_context(&mut self, indent: usize, context_type: String) {
120        self.in_context = true;
121        self.context_indent = indent;
122        self.context_type = Some(context_type);
123    }
124
125    /// Exit the current context
126    pub fn exit_context(&mut self) {
127        self.in_context = false;
128        self.context_indent = 0;
129        self.context_type = None;
130    }
131
132    /// Check if currently in a context
133    pub fn is_in_context(&self) -> bool {
134        self.in_context
135    }
136
137    /// Get the current context indentation
138    pub fn context_indent(&self) -> usize {
139        self.context_indent
140    }
141
142    /// Get the current context type
143    pub fn context_type(&self) -> Option<&str> {
144        self.context_type.as_deref()
145    }
146}
147
148impl Default for ContextStateMachine {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn test_get_line_indent() {
160        assert_eq!(get_line_indent("no indent"), 0);
161        assert_eq!(get_line_indent("  two spaces"), 2);
162        assert_eq!(get_line_indent("    four spaces"), 4);
163        assert_eq!(get_line_indent("\tone tab"), 4);
164        assert_eq!(get_line_indent("\t\ttwo tabs"), 8);
165        assert_eq!(get_line_indent("  \tmixed"), 6); // 2 spaces + 1 tab
166    }
167
168    #[test]
169    fn test_is_indented_content() {
170        assert!(is_indented_content("    content", 0, 4));
171        assert!(!is_indented_content("  content", 0, 4));
172        assert!(is_indented_content("      content", 2, 4));
173        assert!(!is_indented_content("", 0, 4)); // Empty line
174        assert!(!is_indented_content("   ", 0, 4)); // Only whitespace
175    }
176
177    #[test]
178    fn test_byte_position_tracker() {
179        let content = "line1\nline2\nline3";
180        let tracker = BytePositionTracker::new(content);
181
182        let positions: Vec<_> = tracker.iter_with_positions().collect();
183        assert_eq!(positions.len(), 3);
184        assert_eq!(positions[0], (0, "line1", 0, 5));
185        assert_eq!(positions[1], (1, "line2", 6, 11));
186        assert_eq!(positions[2], (2, "line3", 12, 17));
187    }
188
189    #[test]
190    fn test_position_in_matching_lines() {
191        let content = "normal\nspecial\nnormal";
192        let tracker = BytePositionTracker::new(content);
193
194        // Position 8 is in "special"
195        assert!(tracker.is_position_in_matching_lines(8, |_, line| line == "special"));
196        // Position 2 is in "normal"
197        assert!(!tracker.is_position_in_matching_lines(2, |_, line| line == "special"));
198    }
199
200    #[test]
201    fn test_context_state_machine() {
202        let mut sm = ContextStateMachine::new();
203        assert!(!sm.is_in_context());
204
205        sm.enter_context(4, "admonition".to_string());
206        assert!(sm.is_in_context());
207        assert_eq!(sm.context_indent(), 4);
208        assert_eq!(sm.context_type(), Some("admonition"));
209
210        sm.exit_context();
211        assert!(!sm.is_in_context());
212        assert_eq!(sm.context_type(), None);
213    }
214}