Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8pub mod emphasis_utils;
9pub mod fix_utils;
10pub mod header_id_utils;
11pub mod jinja_utils;
12pub mod kramdown_utils;
13pub mod line_ending;
14pub mod mkdocs_admonitions;
15pub mod mkdocs_attr_list;
16pub mod mkdocs_common;
17pub mod mkdocs_config;
18pub mod mkdocs_critic;
19pub mod mkdocs_definition_lists;
20pub mod mkdocs_extensions;
21pub mod mkdocs_footnotes;
22pub mod mkdocs_html_markdown;
23pub mod mkdocs_icons;
24pub mod mkdocs_patterns;
25pub mod mkdocs_snippets;
26pub mod mkdocs_tabs;
27pub mod mkdocstrings_refs;
28pub mod obsidian_config;
29pub mod parser_options;
30pub mod pymdown_blocks;
31pub mod quarto_divs;
32pub mod range_utils;
33pub mod regex_cache;
34pub mod sentence_utils;
35pub mod skip_context;
36pub mod string_interner;
37pub mod table_utils;
38pub mod text_reflow;
39pub mod thematic_break;
40pub mod utf8_offsets;
41
42pub use code_block_utils::CodeBlockUtils;
43pub use line_ending::{
44    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
45    normalize_line_ending,
46};
47pub use parser_options::rumdl_parser_options;
48pub use range_utils::LineIndex;
49
50/// Calculate the visual indentation width of a string, expanding tabs to spaces.
51///
52/// Per CommonMark, tabs expand to the next tab stop (columns 4, 8, 12, ...).
53pub fn calculate_indentation_width(indent_str: &str, tab_width: usize) -> usize {
54    let mut width = 0;
55    for ch in indent_str.chars() {
56        if ch == '\t' {
57            width = ((width / tab_width) + 1) * tab_width;
58        } else if ch == ' ' {
59            width += 1;
60        } else {
61            break;
62        }
63    }
64    width
65}
66
67/// Calculate the visual indentation width using default tab width of 4
68pub fn calculate_indentation_width_default(indent_str: &str) -> usize {
69    calculate_indentation_width(indent_str, 4)
70}
71
72/// Check if a line is a definition list item (Extended Markdown)
73///
74/// Definition lists use the pattern:
75/// ```text
76/// Term
77/// : Definition
78/// ```
79///
80/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
81pub fn is_definition_list_item(line: &str) -> bool {
82    let trimmed = line.trim_start();
83    trimmed.starts_with(": ")
84        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(char::is_whitespace))
85}
86
87/// Check if a line consists only of a template directive with no surrounding text.
88///
89/// Detects template syntax used in static site generators:
90/// - Handlebars/mdBook/Mustache: `{{...}}`
91/// - Jinja2/Liquid/Jekyll: `{%...%}`
92/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
93///
94/// Template directives are preprocessor instructions that should not be merged
95/// into surrounding paragraphs during reflow.
96pub fn is_template_directive_only(line: &str) -> bool {
97    let trimmed = line.trim();
98    if trimmed.is_empty() {
99        return false;
100    }
101    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
102}
103
104/// Trait for string-related extensions
105pub trait StrExt {
106    /// Replace trailing spaces with a specified replacement string
107    fn replace_trailing_spaces(&self, replacement: &str) -> String;
108
109    /// Check if the string has trailing whitespace
110    fn has_trailing_spaces(&self) -> bool;
111
112    /// Count the number of trailing spaces in the string
113    fn trailing_spaces(&self) -> usize;
114}
115
116impl StrExt for str {
117    fn replace_trailing_spaces(&self, replacement: &str) -> String {
118        // Custom implementation to handle both newlines and tabs specially
119
120        // Check if string ends with newline
121        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
122            (stripped, true)
123        } else {
124            (self, false)
125        };
126
127        // Find where the trailing spaces begin
128        let mut non_space_len = content.len();
129        for c in content.chars().rev() {
130            if c == ' ' {
131                non_space_len -= 1;
132            } else {
133                break;
134            }
135        }
136
137        // Build the final string
138        let mut result = String::with_capacity(non_space_len + replacement.len() + usize::from(ends_with_newline));
139        result.push_str(&content[..non_space_len]);
140        result.push_str(replacement);
141        if ends_with_newline {
142            result.push('\n');
143        }
144
145        result
146    }
147
148    fn has_trailing_spaces(&self) -> bool {
149        self.trailing_spaces() > 0
150    }
151
152    fn trailing_spaces(&self) -> usize {
153        // Custom implementation to handle both newlines and tabs specially
154
155        // Prepare the string without newline if it ends with one
156        let content = self.strip_suffix('\n').unwrap_or(self);
157
158        // Count only trailing spaces at the end, not tabs
159        let mut space_count = 0;
160        for c in content.chars().rev() {
161            if c == ' ' {
162                space_count += 1;
163            } else {
164                break;
165            }
166        }
167
168        space_count
169    }
170}
171
172use std::collections::hash_map::DefaultHasher;
173use std::hash::{Hash, Hasher};
174
175/// Fast hash function for string content
176///
177/// This utility function provides a quick way to generate a hash from string content
178/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
179///
180/// # Arguments
181///
182/// * `content` - The string content to hash
183///
184/// # Returns
185///
186/// A 64-bit hash value derived from the content
187pub fn fast_hash(content: &str) -> u64 {
188    let mut hasher = DefaultHasher::new();
189    content.hash(&mut hasher);
190    hasher.finish()
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    #[test]
198    fn test_detect_line_ending_pure_lf() {
199        // Test content with only LF line endings
200        let content = "First line\nSecond line\nThird line\n";
201        assert_eq!(detect_line_ending(content), "\n");
202    }
203
204    #[test]
205    fn test_detect_line_ending_pure_crlf() {
206        // Test content with only CRLF line endings
207        let content = "First line\r\nSecond line\r\nThird line\r\n";
208        assert_eq!(detect_line_ending(content), "\r\n");
209    }
210
211    #[test]
212    fn test_detect_line_ending_mixed_more_lf() {
213        // Test content with mixed line endings where LF is more common
214        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
215        assert_eq!(detect_line_ending(content), "\n");
216    }
217
218    #[test]
219    fn test_detect_line_ending_mixed_more_crlf() {
220        // Test content with mixed line endings where CRLF is more common
221        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
222        assert_eq!(detect_line_ending(content), "\r\n");
223    }
224
225    #[test]
226    fn test_detect_line_ending_empty_string() {
227        // Test empty string - should default to LF
228        let content = "";
229        assert_eq!(detect_line_ending(content), "\n");
230    }
231
232    #[test]
233    fn test_detect_line_ending_single_line_no_ending() {
234        // Test single line without any line endings - should default to LF
235        let content = "This is a single line with no line ending";
236        assert_eq!(detect_line_ending(content), "\n");
237    }
238
239    #[test]
240    fn test_detect_line_ending_equal_lf_and_crlf() {
241        // Test edge case with equal number of CRLF and LF
242        // Since LF count is calculated as total '\n' minus CRLF count,
243        // and the algorithm uses > (not >=), it should default to LF
244        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
245        assert_eq!(detect_line_ending(content), "\n");
246    }
247
248    #[test]
249    fn test_detect_line_ending_single_lf() {
250        // Test with just a single LF
251        let content = "Line 1\n";
252        assert_eq!(detect_line_ending(content), "\n");
253    }
254
255    #[test]
256    fn test_detect_line_ending_single_crlf() {
257        // Test with just a single CRLF
258        let content = "Line 1\r\n";
259        assert_eq!(detect_line_ending(content), "\r\n");
260    }
261
262    #[test]
263    fn test_detect_line_ending_embedded_cr() {
264        // Test with CR characters that are not part of CRLF
265        // These should not affect the count
266        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
267        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
268        assert_eq!(detect_line_ending(content), "\n");
269    }
270}