Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8pub mod early_returns;
9pub mod element_cache;
10pub mod emphasis_utils;
11pub mod fix_utils;
12pub mod header_id_utils;
13pub mod jinja_utils;
14pub mod kramdown_utils;
15pub mod line_ending;
16pub mod markdown_elements;
17pub mod mkdocs_abbreviations;
18pub mod mkdocs_admonitions;
19pub mod mkdocs_attr_list;
20pub mod mkdocs_common;
21pub mod mkdocs_config;
22pub mod mkdocs_critic;
23pub mod mkdocs_definition_lists;
24pub mod mkdocs_extensions;
25pub mod mkdocs_footnotes;
26pub mod mkdocs_html_markdown;
27pub mod mkdocs_icons;
28pub mod mkdocs_patterns;
29pub mod mkdocs_snippets;
30pub mod mkdocs_tabs;
31pub mod mkdocs_test_utils;
32pub mod mkdocstrings_refs;
33pub mod pymdown_blocks;
34pub mod quarto_divs;
35pub mod range_utils;
36pub mod regex_cache;
37pub mod sentence_utils;
38pub mod skip_context;
39pub mod string_interner;
40pub mod table_utils;
41pub mod text_reflow;
42pub mod utf8_offsets;
43
44pub use code_block_utils::CodeBlockUtils;
45pub use line_ending::{
46    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
47    normalize_line_ending,
48};
49pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
50pub use range_utils::LineIndex;
51
52/// Check if a line is a definition list item (Extended Markdown)
53///
54/// Definition lists use the pattern:
55/// ```text
56/// Term
57/// : Definition
58/// ```
59///
60/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
61pub fn is_definition_list_item(line: &str) -> bool {
62    let trimmed = line.trim_start();
63    trimmed.starts_with(": ")
64        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
65}
66
67/// Check if a line consists only of a template directive with no surrounding text.
68///
69/// Detects template syntax used in static site generators:
70/// - Handlebars/mdBook/Mustache: `{{...}}`
71/// - Jinja2/Liquid/Jekyll: `{%...%}`
72/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
73///
74/// Template directives are preprocessor instructions that should not be merged
75/// into surrounding paragraphs during reflow.
76pub fn is_template_directive_only(line: &str) -> bool {
77    let trimmed = line.trim();
78    if trimmed.is_empty() {
79        return false;
80    }
81    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
82}
83
84/// Trait for string-related extensions
85pub trait StrExt {
86    /// Replace trailing spaces with a specified replacement string
87    fn replace_trailing_spaces(&self, replacement: &str) -> String;
88
89    /// Check if the string has trailing whitespace
90    fn has_trailing_spaces(&self) -> bool;
91
92    /// Count the number of trailing spaces in the string
93    fn trailing_spaces(&self) -> usize;
94}
95
96impl StrExt for str {
97    fn replace_trailing_spaces(&self, replacement: &str) -> String {
98        // Custom implementation to handle both newlines and tabs specially
99
100        // Check if string ends with newline
101        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
102            (stripped, true)
103        } else {
104            (self, false)
105        };
106
107        // Find where the trailing spaces begin
108        let mut non_space_len = content.len();
109        for c in content.chars().rev() {
110            if c == ' ' {
111                non_space_len -= 1;
112            } else {
113                break;
114            }
115        }
116
117        // Build the final string
118        let mut result =
119            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
120        result.push_str(&content[..non_space_len]);
121        result.push_str(replacement);
122        if ends_with_newline {
123            result.push('\n');
124        }
125
126        result
127    }
128
129    fn has_trailing_spaces(&self) -> bool {
130        self.trailing_spaces() > 0
131    }
132
133    fn trailing_spaces(&self) -> usize {
134        // Custom implementation to handle both newlines and tabs specially
135
136        // Prepare the string without newline if it ends with one
137        let content = self.strip_suffix('\n').unwrap_or(self);
138
139        // Count only trailing spaces at the end, not tabs
140        let mut space_count = 0;
141        for c in content.chars().rev() {
142            if c == ' ' {
143                space_count += 1;
144            } else {
145                break;
146            }
147        }
148
149        space_count
150    }
151}
152
153use std::collections::hash_map::DefaultHasher;
154use std::hash::{Hash, Hasher};
155
156/// Fast hash function for string content
157///
158/// This utility function provides a quick way to generate a hash from string content
159/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
160///
161/// # Arguments
162///
163/// * `content` - The string content to hash
164///
165/// # Returns
166///
167/// A 64-bit hash value derived from the content
168pub fn fast_hash(content: &str) -> u64 {
169    let mut hasher = DefaultHasher::new();
170    content.hash(&mut hasher);
171    hasher.finish()
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_detect_line_ending_pure_lf() {
180        // Test content with only LF line endings
181        let content = "First line\nSecond line\nThird line\n";
182        assert_eq!(detect_line_ending(content), "\n");
183    }
184
185    #[test]
186    fn test_detect_line_ending_pure_crlf() {
187        // Test content with only CRLF line endings
188        let content = "First line\r\nSecond line\r\nThird line\r\n";
189        assert_eq!(detect_line_ending(content), "\r\n");
190    }
191
192    #[test]
193    fn test_detect_line_ending_mixed_more_lf() {
194        // Test content with mixed line endings where LF is more common
195        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
196        assert_eq!(detect_line_ending(content), "\n");
197    }
198
199    #[test]
200    fn test_detect_line_ending_mixed_more_crlf() {
201        // Test content with mixed line endings where CRLF is more common
202        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
203        assert_eq!(detect_line_ending(content), "\r\n");
204    }
205
206    #[test]
207    fn test_detect_line_ending_empty_string() {
208        // Test empty string - should default to LF
209        let content = "";
210        assert_eq!(detect_line_ending(content), "\n");
211    }
212
213    #[test]
214    fn test_detect_line_ending_single_line_no_ending() {
215        // Test single line without any line endings - should default to LF
216        let content = "This is a single line with no line ending";
217        assert_eq!(detect_line_ending(content), "\n");
218    }
219
220    #[test]
221    fn test_detect_line_ending_equal_lf_and_crlf() {
222        // Test edge case with equal number of CRLF and LF
223        // Since LF count is calculated as total '\n' minus CRLF count,
224        // and the algorithm uses > (not >=), it should default to LF
225        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
226        assert_eq!(detect_line_ending(content), "\n");
227    }
228
229    #[test]
230    fn test_detect_line_ending_single_lf() {
231        // Test with just a single LF
232        let content = "Line 1\n";
233        assert_eq!(detect_line_ending(content), "\n");
234    }
235
236    #[test]
237    fn test_detect_line_ending_single_crlf() {
238        // Test with just a single CRLF
239        let content = "Line 1\r\n";
240        assert_eq!(detect_line_ending(content), "\r\n");
241    }
242
243    #[test]
244    fn test_detect_line_ending_embedded_cr() {
245        // Test with CR characters that are not part of CRLF
246        // These should not affect the count
247        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
248        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
249        assert_eq!(detect_line_ending(content), "\n");
250    }
251}