rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod code_block_utils;
7// DocumentStructure has been merged into LintContext
8// pub mod document_structure;
9pub mod early_returns;
10pub mod element_cache;
11pub mod emphasis_utils;
12pub mod fix_utils;
13pub mod header_id_utils;
14pub mod jinja_utils;
15pub mod kramdown_utils;
16pub mod line_ending;
17pub mod markdown_elements;
18pub mod mkdocs_abbreviations;
19pub mod mkdocs_admonitions;
20pub mod mkdocs_attr_list;
21pub mod mkdocs_common;
22pub mod mkdocs_critic;
23pub mod mkdocs_definition_lists;
24pub mod mkdocs_extensions;
25pub mod mkdocs_footnotes;
26pub mod mkdocs_icons;
27pub mod mkdocs_patterns;
28pub mod mkdocs_snippets;
29pub mod mkdocs_tabs;
30pub mod mkdocs_test_utils;
31pub mod mkdocstrings_refs;
32pub mod quarto_divs;
33pub mod range_utils;
34pub mod regex_cache;
35pub mod sentence_utils;
36pub mod skip_context;
37pub mod string_interner;
38pub mod table_utils;
39pub mod text_reflow;
40pub mod utf8_offsets;
41
42pub use code_block_utils::CodeBlockUtils;
43// pub use document_structure::DocumentStructure;
44pub use line_ending::{
45    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
46    normalize_line_ending,
47};
48pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
49pub use range_utils::LineIndex;
50
51/// Check if a line is a definition list item (Extended Markdown)
52///
53/// Definition lists use the pattern:
54/// ```text
55/// Term
56/// : Definition
57/// ```
58///
59/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
60pub fn is_definition_list_item(line: &str) -> bool {
61    let trimmed = line.trim_start();
62    trimmed.starts_with(": ")
63        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
64}
65
66/// Trait for string-related extensions
67pub trait StrExt {
68    /// Replace trailing spaces with a specified replacement string
69    fn replace_trailing_spaces(&self, replacement: &str) -> String;
70
71    /// Check if the string has trailing whitespace
72    fn has_trailing_spaces(&self) -> bool;
73
74    /// Count the number of trailing spaces in the string
75    fn trailing_spaces(&self) -> usize;
76}
77
78impl StrExt for str {
79    fn replace_trailing_spaces(&self, replacement: &str) -> String {
80        // Custom implementation to handle both newlines and tabs specially
81
82        // Check if string ends with newline
83        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
84            (stripped, true)
85        } else {
86            (self, false)
87        };
88
89        // Find where the trailing spaces begin
90        let mut non_space_len = content.len();
91        for c in content.chars().rev() {
92            if c == ' ' {
93                non_space_len -= 1;
94            } else {
95                break;
96            }
97        }
98
99        // Build the final string
100        let mut result =
101            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
102        result.push_str(&content[..non_space_len]);
103        result.push_str(replacement);
104        if ends_with_newline {
105            result.push('\n');
106        }
107
108        result
109    }
110
111    fn has_trailing_spaces(&self) -> bool {
112        self.trailing_spaces() > 0
113    }
114
115    fn trailing_spaces(&self) -> usize {
116        // Custom implementation to handle both newlines and tabs specially
117
118        // Prepare the string without newline if it ends with one
119        let content = self.strip_suffix('\n').unwrap_or(self);
120
121        // Count only trailing spaces at the end, not tabs
122        let mut space_count = 0;
123        for c in content.chars().rev() {
124            if c == ' ' {
125                space_count += 1;
126            } else {
127                break;
128            }
129        }
130
131        space_count
132    }
133}
134
135use std::collections::hash_map::DefaultHasher;
136use std::hash::{Hash, Hasher};
137
138/// Fast hash function for string content
139///
140/// This utility function provides a quick way to generate a hash from string content
141/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
142///
143/// # Arguments
144///
145/// * `content` - The string content to hash
146///
147/// # Returns
148///
149/// A 64-bit hash value derived from the content
150pub fn fast_hash(content: &str) -> u64 {
151    let mut hasher = DefaultHasher::new();
152    content.hash(&mut hasher);
153    hasher.finish()
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn test_detect_line_ending_pure_lf() {
162        // Test content with only LF line endings
163        let content = "First line\nSecond line\nThird line\n";
164        assert_eq!(detect_line_ending(content), "\n");
165    }
166
167    #[test]
168    fn test_detect_line_ending_pure_crlf() {
169        // Test content with only CRLF line endings
170        let content = "First line\r\nSecond line\r\nThird line\r\n";
171        assert_eq!(detect_line_ending(content), "\r\n");
172    }
173
174    #[test]
175    fn test_detect_line_ending_mixed_more_lf() {
176        // Test content with mixed line endings where LF is more common
177        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
178        assert_eq!(detect_line_ending(content), "\n");
179    }
180
181    #[test]
182    fn test_detect_line_ending_mixed_more_crlf() {
183        // Test content with mixed line endings where CRLF is more common
184        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
185        assert_eq!(detect_line_ending(content), "\r\n");
186    }
187
188    #[test]
189    fn test_detect_line_ending_empty_string() {
190        // Test empty string - should default to LF
191        let content = "";
192        assert_eq!(detect_line_ending(content), "\n");
193    }
194
195    #[test]
196    fn test_detect_line_ending_single_line_no_ending() {
197        // Test single line without any line endings - should default to LF
198        let content = "This is a single line with no line ending";
199        assert_eq!(detect_line_ending(content), "\n");
200    }
201
202    #[test]
203    fn test_detect_line_ending_equal_lf_and_crlf() {
204        // Test edge case with equal number of CRLF and LF
205        // Since LF count is calculated as total '\n' minus CRLF count,
206        // and the algorithm uses > (not >=), it should default to LF
207        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
208        assert_eq!(detect_line_ending(content), "\n");
209    }
210
211    #[test]
212    fn test_detect_line_ending_single_lf() {
213        // Test with just a single LF
214        let content = "Line 1\n";
215        assert_eq!(detect_line_ending(content), "\n");
216    }
217
218    #[test]
219    fn test_detect_line_ending_single_crlf() {
220        // Test with just a single CRLF
221        let content = "Line 1\r\n";
222        assert_eq!(detect_line_ending(content), "\r\n");
223    }
224
225    #[test]
226    fn test_detect_line_ending_embedded_cr() {
227        // Test with CR characters that are not part of CRLF
228        // These should not affect the count
229        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
230        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
231        assert_eq!(detect_line_ending(content), "\n");
232    }
233}