Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8pub mod emphasis_utils;
9pub mod fix_utils;
10pub mod header_id_utils;
11pub mod jinja_utils;
12pub mod kramdown_utils;
13pub mod line_ending;
14pub mod mkdocs_admonitions;
15pub mod mkdocs_attr_list;
16pub mod mkdocs_common;
17pub mod mkdocs_config;
18pub mod mkdocs_critic;
19pub mod mkdocs_definition_lists;
20pub mod mkdocs_extensions;
21pub mod mkdocs_footnotes;
22pub mod mkdocs_html_markdown;
23pub mod mkdocs_icons;
24pub mod mkdocs_patterns;
25pub mod mkdocs_snippets;
26pub mod mkdocs_tabs;
27pub mod mkdocstrings_refs;
28pub mod obsidian_config;
29pub mod parser_options;
30pub mod project_root;
31pub mod pymdown_blocks;
32pub mod quarto_divs;
33pub mod range_utils;
34pub mod regex_cache;
35pub mod sentence_utils;
36pub mod skip_context;
37pub mod string_interner;
38pub mod table_utils;
39pub mod text_reflow;
40pub mod thematic_break;
41pub mod utf8_offsets;
42
43pub use code_block_utils::CodeBlockUtils;
44pub use line_ending::{
45    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
46    normalize_line_ending,
47};
48pub use parser_options::rumdl_parser_options;
49pub use range_utils::LineIndex;
50
51/// Calculate the visual indentation width of a string, expanding tabs to spaces.
52///
53/// Per CommonMark, tabs expand to the next tab stop (columns 4, 8, 12, ...).
54pub fn calculate_indentation_width(indent_str: &str, tab_width: usize) -> usize {
55    let mut width = 0;
56    for ch in indent_str.chars() {
57        if ch == '\t' {
58            width = ((width / tab_width) + 1) * tab_width;
59        } else if ch == ' ' {
60            width += 1;
61        } else {
62            break;
63        }
64    }
65    width
66}
67
68/// Calculate the visual indentation width using default tab width of 4
69pub fn calculate_indentation_width_default(indent_str: &str) -> usize {
70    calculate_indentation_width(indent_str, 4)
71}
72
73/// Check if a line is a definition list item (Extended Markdown)
74///
75/// Definition lists use the pattern:
76/// ```text
77/// Term
78/// : Definition
79/// ```
80///
81/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
82pub fn is_definition_list_item(line: &str) -> bool {
83    let trimmed = line.trim_start();
84    trimmed.starts_with(": ")
85        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(char::is_whitespace))
86}
87
88/// Check if a line consists only of a template directive with no surrounding text.
89///
90/// Detects template syntax used in static site generators:
91/// - Handlebars/mdBook/Mustache: `{{...}}`
92/// - Jinja2/Liquid/Jekyll: `{%...%}`
93/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
94///
95/// Template directives are preprocessor instructions that should not be merged
96/// into surrounding paragraphs during reflow.
97pub fn is_template_directive_only(line: &str) -> bool {
98    let trimmed = line.trim();
99    if trimmed.is_empty() {
100        return false;
101    }
102    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
103}
104
105/// Trait for string-related extensions
106pub trait StrExt {
107    /// Replace trailing spaces with a specified replacement string
108    fn replace_trailing_spaces(&self, replacement: &str) -> String;
109
110    /// Check if the string has trailing whitespace
111    fn has_trailing_spaces(&self) -> bool;
112
113    /// Count the number of trailing spaces in the string
114    fn trailing_spaces(&self) -> usize;
115}
116
117impl StrExt for str {
118    fn replace_trailing_spaces(&self, replacement: &str) -> String {
119        // Custom implementation to handle both newlines and tabs specially
120
121        // Check if string ends with newline
122        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
123            (stripped, true)
124        } else {
125            (self, false)
126        };
127
128        // Find where the trailing spaces begin
129        let mut non_space_len = content.len();
130        for c in content.chars().rev() {
131            if c == ' ' {
132                non_space_len -= 1;
133            } else {
134                break;
135            }
136        }
137
138        // Build the final string
139        let mut result = String::with_capacity(non_space_len + replacement.len() + usize::from(ends_with_newline));
140        result.push_str(&content[..non_space_len]);
141        result.push_str(replacement);
142        if ends_with_newline {
143            result.push('\n');
144        }
145
146        result
147    }
148
149    fn has_trailing_spaces(&self) -> bool {
150        self.trailing_spaces() > 0
151    }
152
153    fn trailing_spaces(&self) -> usize {
154        // Custom implementation to handle both newlines and tabs specially
155
156        // Prepare the string without newline if it ends with one
157        let content = self.strip_suffix('\n').unwrap_or(self);
158
159        // Count only trailing spaces at the end, not tabs
160        let mut space_count = 0;
161        for c in content.chars().rev() {
162            if c == ' ' {
163                space_count += 1;
164            } else {
165                break;
166            }
167        }
168
169        space_count
170    }
171}
172
173use std::collections::hash_map::DefaultHasher;
174use std::hash::{Hash, Hasher};
175
176/// Fast hash function for string content
177///
178/// This utility function provides a quick way to generate a hash from string content
179/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
180///
181/// # Arguments
182///
183/// * `content` - The string content to hash
184///
185/// # Returns
186///
187/// A 64-bit hash value derived from the content
188pub fn fast_hash(content: &str) -> u64 {
189    let mut hasher = DefaultHasher::new();
190    content.hash(&mut hasher);
191    hasher.finish()
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn test_detect_line_ending_pure_lf() {
200        // Test content with only LF line endings
201        let content = "First line\nSecond line\nThird line\n";
202        assert_eq!(detect_line_ending(content), "\n");
203    }
204
205    #[test]
206    fn test_detect_line_ending_pure_crlf() {
207        // Test content with only CRLF line endings
208        let content = "First line\r\nSecond line\r\nThird line\r\n";
209        assert_eq!(detect_line_ending(content), "\r\n");
210    }
211
212    #[test]
213    fn test_detect_line_ending_mixed_more_lf() {
214        // Test content with mixed line endings where LF is more common
215        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
216        assert_eq!(detect_line_ending(content), "\n");
217    }
218
219    #[test]
220    fn test_detect_line_ending_mixed_more_crlf() {
221        // Test content with mixed line endings where CRLF is more common
222        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
223        assert_eq!(detect_line_ending(content), "\r\n");
224    }
225
226    #[test]
227    fn test_detect_line_ending_empty_string() {
228        // Test empty string - should default to LF
229        let content = "";
230        assert_eq!(detect_line_ending(content), "\n");
231    }
232
233    #[test]
234    fn test_detect_line_ending_single_line_no_ending() {
235        // Test single line without any line endings - should default to LF
236        let content = "This is a single line with no line ending";
237        assert_eq!(detect_line_ending(content), "\n");
238    }
239
240    #[test]
241    fn test_detect_line_ending_equal_lf_and_crlf() {
242        // Test edge case with equal number of CRLF and LF
243        // Since LF count is calculated as total '\n' minus CRLF count,
244        // and the algorithm uses > (not >=), it should default to LF
245        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
246        assert_eq!(detect_line_ending(content), "\n");
247    }
248
249    #[test]
250    fn test_detect_line_ending_single_lf() {
251        // Test with just a single LF
252        let content = "Line 1\n";
253        assert_eq!(detect_line_ending(content), "\n");
254    }
255
256    #[test]
257    fn test_detect_line_ending_single_crlf() {
258        // Test with just a single CRLF
259        let content = "Line 1\r\n";
260        assert_eq!(detect_line_ending(content), "\r\n");
261    }
262
263    #[test]
264    fn test_detect_line_ending_embedded_cr() {
265        // Test with CR characters that are not part of CRLF
266        // These should not affect the count
267        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
268        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
269        assert_eq!(detect_line_ending(content), "\n");
270    }
271}