Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8pub mod early_returns;
9pub mod emphasis_utils;
10pub mod fix_utils;
11pub mod header_id_utils;
12pub mod jinja_utils;
13pub mod kramdown_utils;
14pub mod line_ending;
15pub mod markdown_elements;
16pub mod mkdocs_abbreviations;
17pub mod mkdocs_admonitions;
18pub mod mkdocs_attr_list;
19pub mod mkdocs_common;
20pub mod mkdocs_config;
21pub mod mkdocs_critic;
22pub mod mkdocs_definition_lists;
23pub mod mkdocs_extensions;
24pub mod mkdocs_footnotes;
25pub mod mkdocs_html_markdown;
26pub mod mkdocs_icons;
27pub mod mkdocs_patterns;
28pub mod mkdocs_snippets;
29pub mod mkdocs_tabs;
30pub mod mkdocs_test_utils;
31pub mod mkdocstrings_refs;
32pub mod pymdown_blocks;
33pub mod quarto_divs;
34pub mod range_utils;
35pub mod regex_cache;
36pub mod sentence_utils;
37pub mod skip_context;
38pub mod string_interner;
39pub mod table_utils;
40pub mod text_reflow;
41pub mod thematic_break;
42pub mod utf8_offsets;
43
44pub use code_block_utils::CodeBlockUtils;
45pub use line_ending::{
46    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
47    normalize_line_ending,
48};
49pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
50pub use range_utils::LineIndex;
51
52/// Calculate the visual indentation width of a string, expanding tabs to spaces.
53///
54/// Per CommonMark, tabs expand to the next tab stop (columns 4, 8, 12, ...).
55pub fn calculate_indentation_width(indent_str: &str, tab_width: usize) -> usize {
56    let mut width = 0;
57    for ch in indent_str.chars() {
58        if ch == '\t' {
59            width = ((width / tab_width) + 1) * tab_width;
60        } else if ch == ' ' {
61            width += 1;
62        } else {
63            break;
64        }
65    }
66    width
67}
68
69/// Calculate the visual indentation width using default tab width of 4
70pub fn calculate_indentation_width_default(indent_str: &str) -> usize {
71    calculate_indentation_width(indent_str, 4)
72}
73
74/// Check if a line is a definition list item (Extended Markdown)
75///
76/// Definition lists use the pattern:
77/// ```text
78/// Term
79/// : Definition
80/// ```
81///
82/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
83pub fn is_definition_list_item(line: &str) -> bool {
84    let trimmed = line.trim_start();
85    trimmed.starts_with(": ")
86        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
87}
88
89/// Check if a line consists only of a template directive with no surrounding text.
90///
91/// Detects template syntax used in static site generators:
92/// - Handlebars/mdBook/Mustache: `{{...}}`
93/// - Jinja2/Liquid/Jekyll: `{%...%}`
94/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
95///
96/// Template directives are preprocessor instructions that should not be merged
97/// into surrounding paragraphs during reflow.
98pub fn is_template_directive_only(line: &str) -> bool {
99    let trimmed = line.trim();
100    if trimmed.is_empty() {
101        return false;
102    }
103    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
104}
105
106/// Trait for string-related extensions
107pub trait StrExt {
108    /// Replace trailing spaces with a specified replacement string
109    fn replace_trailing_spaces(&self, replacement: &str) -> String;
110
111    /// Check if the string has trailing whitespace
112    fn has_trailing_spaces(&self) -> bool;
113
114    /// Count the number of trailing spaces in the string
115    fn trailing_spaces(&self) -> usize;
116}
117
118impl StrExt for str {
119    fn replace_trailing_spaces(&self, replacement: &str) -> String {
120        // Custom implementation to handle both newlines and tabs specially
121
122        // Check if string ends with newline
123        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
124            (stripped, true)
125        } else {
126            (self, false)
127        };
128
129        // Find where the trailing spaces begin
130        let mut non_space_len = content.len();
131        for c in content.chars().rev() {
132            if c == ' ' {
133                non_space_len -= 1;
134            } else {
135                break;
136            }
137        }
138
139        // Build the final string
140        let mut result =
141            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
142        result.push_str(&content[..non_space_len]);
143        result.push_str(replacement);
144        if ends_with_newline {
145            result.push('\n');
146        }
147
148        result
149    }
150
151    fn has_trailing_spaces(&self) -> bool {
152        self.trailing_spaces() > 0
153    }
154
155    fn trailing_spaces(&self) -> usize {
156        // Custom implementation to handle both newlines and tabs specially
157
158        // Prepare the string without newline if it ends with one
159        let content = self.strip_suffix('\n').unwrap_or(self);
160
161        // Count only trailing spaces at the end, not tabs
162        let mut space_count = 0;
163        for c in content.chars().rev() {
164            if c == ' ' {
165                space_count += 1;
166            } else {
167                break;
168            }
169        }
170
171        space_count
172    }
173}
174
175use std::collections::hash_map::DefaultHasher;
176use std::hash::{Hash, Hasher};
177
178/// Fast hash function for string content
179///
180/// This utility function provides a quick way to generate a hash from string content
181/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
182///
183/// # Arguments
184///
185/// * `content` - The string content to hash
186///
187/// # Returns
188///
189/// A 64-bit hash value derived from the content
190pub fn fast_hash(content: &str) -> u64 {
191    let mut hasher = DefaultHasher::new();
192    content.hash(&mut hasher);
193    hasher.finish()
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_detect_line_ending_pure_lf() {
202        // Test content with only LF line endings
203        let content = "First line\nSecond line\nThird line\n";
204        assert_eq!(detect_line_ending(content), "\n");
205    }
206
207    #[test]
208    fn test_detect_line_ending_pure_crlf() {
209        // Test content with only CRLF line endings
210        let content = "First line\r\nSecond line\r\nThird line\r\n";
211        assert_eq!(detect_line_ending(content), "\r\n");
212    }
213
214    #[test]
215    fn test_detect_line_ending_mixed_more_lf() {
216        // Test content with mixed line endings where LF is more common
217        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
218        assert_eq!(detect_line_ending(content), "\n");
219    }
220
221    #[test]
222    fn test_detect_line_ending_mixed_more_crlf() {
223        // Test content with mixed line endings where CRLF is more common
224        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
225        assert_eq!(detect_line_ending(content), "\r\n");
226    }
227
228    #[test]
229    fn test_detect_line_ending_empty_string() {
230        // Test empty string - should default to LF
231        let content = "";
232        assert_eq!(detect_line_ending(content), "\n");
233    }
234
235    #[test]
236    fn test_detect_line_ending_single_line_no_ending() {
237        // Test single line without any line endings - should default to LF
238        let content = "This is a single line with no line ending";
239        assert_eq!(detect_line_ending(content), "\n");
240    }
241
242    #[test]
243    fn test_detect_line_ending_equal_lf_and_crlf() {
244        // Test edge case with equal number of CRLF and LF
245        // Since LF count is calculated as total '\n' minus CRLF count,
246        // and the algorithm uses > (not >=), it should default to LF
247        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
248        assert_eq!(detect_line_ending(content), "\n");
249    }
250
251    #[test]
252    fn test_detect_line_ending_single_lf() {
253        // Test with just a single LF
254        let content = "Line 1\n";
255        assert_eq!(detect_line_ending(content), "\n");
256    }
257
258    #[test]
259    fn test_detect_line_ending_single_crlf() {
260        // Test with just a single CRLF
261        let content = "Line 1\r\n";
262        assert_eq!(detect_line_ending(content), "\r\n");
263    }
264
265    #[test]
266    fn test_detect_line_ending_embedded_cr() {
267        // Test with CR characters that are not part of CRLF
268        // These should not affect the count
269        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
270        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
271        assert_eq!(detect_line_ending(content), "\n");
272    }
273}