Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8// DocumentStructure has been merged into LintContext
9// pub mod document_structure;
10pub mod early_returns;
11pub mod element_cache;
12pub mod emphasis_utils;
13pub mod fix_utils;
14pub mod header_id_utils;
15pub mod jinja_utils;
16pub mod kramdown_utils;
17pub mod line_ending;
18pub mod markdown_elements;
19pub mod mkdocs_abbreviations;
20pub mod mkdocs_admonitions;
21pub mod mkdocs_attr_list;
22pub mod mkdocs_common;
23pub mod mkdocs_config;
24pub mod mkdocs_critic;
25pub mod mkdocs_definition_lists;
26pub mod mkdocs_extensions;
27pub mod mkdocs_footnotes;
28pub mod mkdocs_html_markdown;
29pub mod mkdocs_icons;
30pub mod mkdocs_patterns;
31pub mod mkdocs_snippets;
32pub mod mkdocs_tabs;
33pub mod mkdocs_test_utils;
34pub mod mkdocstrings_refs;
35pub mod pymdown_blocks;
36pub mod quarto_divs;
37pub mod range_utils;
38pub mod regex_cache;
39pub mod sentence_utils;
40pub mod skip_context;
41pub mod string_interner;
42pub mod table_utils;
43pub mod text_reflow;
44pub mod utf8_offsets;
45
46pub use code_block_utils::CodeBlockUtils;
47// pub use document_structure::DocumentStructure;
48pub use line_ending::{
49    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
50    normalize_line_ending,
51};
52pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
53pub use range_utils::LineIndex;
54
55/// Check if a line is a definition list item (Extended Markdown)
56///
57/// Definition lists use the pattern:
58/// ```text
59/// Term
60/// : Definition
61/// ```
62///
63/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
64pub fn is_definition_list_item(line: &str) -> bool {
65    let trimmed = line.trim_start();
66    trimmed.starts_with(": ")
67        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
68}
69
70/// Check if a line consists only of a template directive with no surrounding text.
71///
72/// Detects template syntax used in static site generators:
73/// - Handlebars/mdBook/Mustache: `{{...}}`
74/// - Jinja2/Liquid/Jekyll: `{%...%}`
75/// - Hugo shortcodes: `{{<...>}}` or `{{%...%}}`
76///
77/// Template directives are preprocessor instructions that should not be merged
78/// into surrounding paragraphs during reflow.
79pub fn is_template_directive_only(line: &str) -> bool {
80    let trimmed = line.trim();
81    if trimmed.is_empty() {
82        return false;
83    }
84    (trimmed.starts_with("{{") && trimmed.ends_with("}}")) || (trimmed.starts_with("{%") && trimmed.ends_with("%}"))
85}
86
87/// Trait for string-related extensions
88pub trait StrExt {
89    /// Replace trailing spaces with a specified replacement string
90    fn replace_trailing_spaces(&self, replacement: &str) -> String;
91
92    /// Check if the string has trailing whitespace
93    fn has_trailing_spaces(&self) -> bool;
94
95    /// Count the number of trailing spaces in the string
96    fn trailing_spaces(&self) -> usize;
97}
98
99impl StrExt for str {
100    fn replace_trailing_spaces(&self, replacement: &str) -> String {
101        // Custom implementation to handle both newlines and tabs specially
102
103        // Check if string ends with newline
104        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
105            (stripped, true)
106        } else {
107            (self, false)
108        };
109
110        // Find where the trailing spaces begin
111        let mut non_space_len = content.len();
112        for c in content.chars().rev() {
113            if c == ' ' {
114                non_space_len -= 1;
115            } else {
116                break;
117            }
118        }
119
120        // Build the final string
121        let mut result =
122            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
123        result.push_str(&content[..non_space_len]);
124        result.push_str(replacement);
125        if ends_with_newline {
126            result.push('\n');
127        }
128
129        result
130    }
131
132    fn has_trailing_spaces(&self) -> bool {
133        self.trailing_spaces() > 0
134    }
135
136    fn trailing_spaces(&self) -> usize {
137        // Custom implementation to handle both newlines and tabs specially
138
139        // Prepare the string without newline if it ends with one
140        let content = self.strip_suffix('\n').unwrap_or(self);
141
142        // Count only trailing spaces at the end, not tabs
143        let mut space_count = 0;
144        for c in content.chars().rev() {
145            if c == ' ' {
146                space_count += 1;
147            } else {
148                break;
149            }
150        }
151
152        space_count
153    }
154}
155
156use std::collections::hash_map::DefaultHasher;
157use std::hash::{Hash, Hasher};
158
159/// Fast hash function for string content
160///
161/// This utility function provides a quick way to generate a hash from string content
162/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
163///
164/// # Arguments
165///
166/// * `content` - The string content to hash
167///
168/// # Returns
169///
170/// A 64-bit hash value derived from the content
171pub fn fast_hash(content: &str) -> u64 {
172    let mut hasher = DefaultHasher::new();
173    content.hash(&mut hasher);
174    hasher.finish()
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_detect_line_ending_pure_lf() {
183        // Test content with only LF line endings
184        let content = "First line\nSecond line\nThird line\n";
185        assert_eq!(detect_line_ending(content), "\n");
186    }
187
188    #[test]
189    fn test_detect_line_ending_pure_crlf() {
190        // Test content with only CRLF line endings
191        let content = "First line\r\nSecond line\r\nThird line\r\n";
192        assert_eq!(detect_line_ending(content), "\r\n");
193    }
194
195    #[test]
196    fn test_detect_line_ending_mixed_more_lf() {
197        // Test content with mixed line endings where LF is more common
198        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
199        assert_eq!(detect_line_ending(content), "\n");
200    }
201
202    #[test]
203    fn test_detect_line_ending_mixed_more_crlf() {
204        // Test content with mixed line endings where CRLF is more common
205        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
206        assert_eq!(detect_line_ending(content), "\r\n");
207    }
208
209    #[test]
210    fn test_detect_line_ending_empty_string() {
211        // Test empty string - should default to LF
212        let content = "";
213        assert_eq!(detect_line_ending(content), "\n");
214    }
215
216    #[test]
217    fn test_detect_line_ending_single_line_no_ending() {
218        // Test single line without any line endings - should default to LF
219        let content = "This is a single line with no line ending";
220        assert_eq!(detect_line_ending(content), "\n");
221    }
222
223    #[test]
224    fn test_detect_line_ending_equal_lf_and_crlf() {
225        // Test edge case with equal number of CRLF and LF
226        // Since LF count is calculated as total '\n' minus CRLF count,
227        // and the algorithm uses > (not >=), it should default to LF
228        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
229        assert_eq!(detect_line_ending(content), "\n");
230    }
231
232    #[test]
233    fn test_detect_line_ending_single_lf() {
234        // Test with just a single LF
235        let content = "Line 1\n";
236        assert_eq!(detect_line_ending(content), "\n");
237    }
238
239    #[test]
240    fn test_detect_line_ending_single_crlf() {
241        // Test with just a single CRLF
242        let content = "Line 1\r\n";
243        assert_eq!(detect_line_ending(content), "\r\n");
244    }
245
246    #[test]
247    fn test_detect_line_ending_embedded_cr() {
248        // Test with CR characters that are not part of CRLF
249        // These should not affect the count
250        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
251        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
252        assert_eq!(detect_line_ending(content), "\n");
253    }
254}