Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8pub mod early_returns;
9pub mod emphasis_utils;
10pub mod fix_utils;
11pub mod header_id_utils;
12pub mod jinja_utils;
13pub mod kramdown_utils;
14pub mod line_ending;
15pub mod markdown_elements;
16pub mod mkdocs_abbreviations;
17pub mod mkdocs_admonitions;
18pub mod mkdocs_attr_list;
19pub mod mkdocs_common;
20pub mod mkdocs_config;
21pub mod mkdocs_critic;
22pub mod mkdocs_definition_lists;
23pub mod mkdocs_extensions;
24pub mod mkdocs_footnotes;
25pub mod mkdocs_html_markdown;
26pub mod mkdocs_icons;
27pub mod mkdocs_patterns;
28pub mod mkdocs_snippets;
29pub mod mkdocs_tabs;
30pub mod mkdocs_test_utils;
31pub mod mkdocstrings_refs;
32pub mod obsidian_config;
33pub mod parser_options;
34pub mod pymdown_blocks;
35pub mod quarto_divs;
36pub mod range_utils;
37pub mod regex_cache;
38pub mod sentence_utils;
39pub mod skip_context;
40pub mod string_interner;
41pub mod table_utils;
42pub mod text_reflow;
43pub mod thematic_break;
44pub mod utf8_offsets;
45
46pub use code_block_utils::CodeBlockUtils;
47pub use line_ending::{
48    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
49    normalize_line_ending,
50};
51pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
52pub use parser_options::rumdl_parser_options;
53pub use range_utils::LineIndex;
54
55/// Calculate the visual indentation width of a string, expanding tabs to spaces.
56///
57/// Per CommonMark, tabs expand to the next tab stop (columns 4, 8, 12, ...).
58pub fn calculate_indentation_width(indent_str: &str, tab_width: usize) -> usize {
59    let mut width = 0;
60    for ch in indent_str.chars() {
61        if ch == '\t' {
62            width = ((width / tab_width) + 1) * tab_width;
63        } else if ch == ' ' {
64            width += 1;
65        } else {
66            break;
67        }
68    }
69    width
70}
71
72/// Calculate the visual indentation width using default tab width of 4
73pub fn calculate_indentation_width_default(indent_str: &str) -> usize {
74    calculate_indentation_width(indent_str, 4)
75}
76
77/// Check if a line is a definition list item (Extended Markdown)
78///
79/// Definition lists use the pattern:
80/// ```text
81/// Term
82/// : Definition
83/// ```
84///
85/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
86pub fn is_definition_list_item(line: &str) -> bool {
87    let trimmed = line.trim_start();
88    trimmed.starts_with(": ")
89        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
90}
91
92/// Check if a line consists only of a template directive with no surrounding text.
93///
94/// Detects template syntax used in static site generators:
95/// - Handlebars/mdBook/Mustache: `{{...}}`
96/// - Jinja2/Liquid/Jekyll: `{%...%}`
97/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
98///
99/// Template directives are preprocessor instructions that should not be merged
100/// into surrounding paragraphs during reflow.
101pub fn is_template_directive_only(line: &str) -> bool {
102    let trimmed = line.trim();
103    if trimmed.is_empty() {
104        return false;
105    }
106    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
107}
108
109/// Trait for string-related extensions
110pub trait StrExt {
111    /// Replace trailing spaces with a specified replacement string
112    fn replace_trailing_spaces(&self, replacement: &str) -> String;
113
114    /// Check if the string has trailing whitespace
115    fn has_trailing_spaces(&self) -> bool;
116
117    /// Count the number of trailing spaces in the string
118    fn trailing_spaces(&self) -> usize;
119}
120
121impl StrExt for str {
122    fn replace_trailing_spaces(&self, replacement: &str) -> String {
123        // Custom implementation to handle both newlines and tabs specially
124
125        // Check if string ends with newline
126        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
127            (stripped, true)
128        } else {
129            (self, false)
130        };
131
132        // Find where the trailing spaces begin
133        let mut non_space_len = content.len();
134        for c in content.chars().rev() {
135            if c == ' ' {
136                non_space_len -= 1;
137            } else {
138                break;
139            }
140        }
141
142        // Build the final string
143        let mut result =
144            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
145        result.push_str(&content[..non_space_len]);
146        result.push_str(replacement);
147        if ends_with_newline {
148            result.push('\n');
149        }
150
151        result
152    }
153
154    fn has_trailing_spaces(&self) -> bool {
155        self.trailing_spaces() > 0
156    }
157
158    fn trailing_spaces(&self) -> usize {
159        // Custom implementation to handle both newlines and tabs specially
160
161        // Prepare the string without newline if it ends with one
162        let content = self.strip_suffix('\n').unwrap_or(self);
163
164        // Count only trailing spaces at the end, not tabs
165        let mut space_count = 0;
166        for c in content.chars().rev() {
167            if c == ' ' {
168                space_count += 1;
169            } else {
170                break;
171            }
172        }
173
174        space_count
175    }
176}
177
178use std::collections::hash_map::DefaultHasher;
179use std::hash::{Hash, Hasher};
180
181/// Fast hash function for string content
182///
183/// This utility function provides a quick way to generate a hash from string content
184/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
185///
186/// # Arguments
187///
188/// * `content` - The string content to hash
189///
190/// # Returns
191///
192/// A 64-bit hash value derived from the content
193pub fn fast_hash(content: &str) -> u64 {
194    let mut hasher = DefaultHasher::new();
195    content.hash(&mut hasher);
196    hasher.finish()
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_detect_line_ending_pure_lf() {
205        // Test content with only LF line endings
206        let content = "First line\nSecond line\nThird line\n";
207        assert_eq!(detect_line_ending(content), "\n");
208    }
209
210    #[test]
211    fn test_detect_line_ending_pure_crlf() {
212        // Test content with only CRLF line endings
213        let content = "First line\r\nSecond line\r\nThird line\r\n";
214        assert_eq!(detect_line_ending(content), "\r\n");
215    }
216
217    #[test]
218    fn test_detect_line_ending_mixed_more_lf() {
219        // Test content with mixed line endings where LF is more common
220        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
221        assert_eq!(detect_line_ending(content), "\n");
222    }
223
224    #[test]
225    fn test_detect_line_ending_mixed_more_crlf() {
226        // Test content with mixed line endings where CRLF is more common
227        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
228        assert_eq!(detect_line_ending(content), "\r\n");
229    }
230
231    #[test]
232    fn test_detect_line_ending_empty_string() {
233        // Test empty string - should default to LF
234        let content = "";
235        assert_eq!(detect_line_ending(content), "\n");
236    }
237
238    #[test]
239    fn test_detect_line_ending_single_line_no_ending() {
240        // Test single line without any line endings - should default to LF
241        let content = "This is a single line with no line ending";
242        assert_eq!(detect_line_ending(content), "\n");
243    }
244
245    #[test]
246    fn test_detect_line_ending_equal_lf_and_crlf() {
247        // Test edge case with equal number of CRLF and LF
248        // Since LF count is calculated as total '\n' minus CRLF count,
249        // and the algorithm uses > (not >=), it should default to LF
250        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
251        assert_eq!(detect_line_ending(content), "\n");
252    }
253
254    #[test]
255    fn test_detect_line_ending_single_lf() {
256        // Test with just a single LF
257        let content = "Line 1\n";
258        assert_eq!(detect_line_ending(content), "\n");
259    }
260
261    #[test]
262    fn test_detect_line_ending_single_crlf() {
263        // Test with just a single CRLF
264        let content = "Line 1\r\n";
265        assert_eq!(detect_line_ending(content), "\r\n");
266    }
267
268    #[test]
269    fn test_detect_line_ending_embedded_cr() {
270        // Test with CR characters that are not part of CRLF
271        // These should not affect the count
272        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
273        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
274        assert_eq!(detect_line_ending(content), "\n");
275    }
276}