Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8// DocumentStructure has been merged into LintContext
9// pub mod document_structure;
10pub mod early_returns;
11pub mod element_cache;
12pub mod emphasis_utils;
13pub mod fix_utils;
14pub mod header_id_utils;
15pub mod jinja_utils;
16pub mod kramdown_utils;
17pub mod line_ending;
18pub mod markdown_elements;
19pub mod mkdocs_abbreviations;
20pub mod mkdocs_admonitions;
21pub mod mkdocs_attr_list;
22pub mod mkdocs_common;
23pub mod mkdocs_critic;
24pub mod mkdocs_definition_lists;
25pub mod mkdocs_extensions;
26pub mod mkdocs_footnotes;
27pub mod mkdocs_html_markdown;
28pub mod mkdocs_icons;
29pub mod mkdocs_patterns;
30pub mod mkdocs_snippets;
31pub mod mkdocs_tabs;
32pub mod mkdocs_test_utils;
33pub mod mkdocstrings_refs;
34pub mod pymdown_blocks;
35pub mod quarto_divs;
36pub mod range_utils;
37pub mod regex_cache;
38pub mod sentence_utils;
39pub mod skip_context;
40pub mod string_interner;
41pub mod table_utils;
42pub mod text_reflow;
43pub mod utf8_offsets;
44
45pub use code_block_utils::CodeBlockUtils;
46// pub use document_structure::DocumentStructure;
47pub use line_ending::{
48    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
49    normalize_line_ending,
50};
51pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
52pub use range_utils::LineIndex;
53
54/// Check if a line is a definition list item (Extended Markdown)
55///
56/// Definition lists use the pattern:
57/// ```text
58/// Term
59/// : Definition
60/// ```
61///
62/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
63pub fn is_definition_list_item(line: &str) -> bool {
64    let trimmed = line.trim_start();
65    trimmed.starts_with(": ")
66        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
67}
68
69/// Trait for string-related extensions
70pub trait StrExt {
71    /// Replace trailing spaces with a specified replacement string
72    fn replace_trailing_spaces(&self, replacement: &str) -> String;
73
74    /// Check if the string has trailing whitespace
75    fn has_trailing_spaces(&self) -> bool;
76
77    /// Count the number of trailing spaces in the string
78    fn trailing_spaces(&self) -> usize;
79}
80
81impl StrExt for str {
82    fn replace_trailing_spaces(&self, replacement: &str) -> String {
83        // Custom implementation to handle both newlines and tabs specially
84
85        // Check if string ends with newline
86        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
87            (stripped, true)
88        } else {
89            (self, false)
90        };
91
92        // Find where the trailing spaces begin
93        let mut non_space_len = content.len();
94        for c in content.chars().rev() {
95            if c == ' ' {
96                non_space_len -= 1;
97            } else {
98                break;
99            }
100        }
101
102        // Build the final string
103        let mut result =
104            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
105        result.push_str(&content[..non_space_len]);
106        result.push_str(replacement);
107        if ends_with_newline {
108            result.push('\n');
109        }
110
111        result
112    }
113
114    fn has_trailing_spaces(&self) -> bool {
115        self.trailing_spaces() > 0
116    }
117
118    fn trailing_spaces(&self) -> usize {
119        // Custom implementation to handle both newlines and tabs specially
120
121        // Prepare the string without newline if it ends with one
122        let content = self.strip_suffix('\n').unwrap_or(self);
123
124        // Count only trailing spaces at the end, not tabs
125        let mut space_count = 0;
126        for c in content.chars().rev() {
127            if c == ' ' {
128                space_count += 1;
129            } else {
130                break;
131            }
132        }
133
134        space_count
135    }
136}
137
138use std::collections::hash_map::DefaultHasher;
139use std::hash::{Hash, Hasher};
140
141/// Fast hash function for string content
142///
143/// This utility function provides a quick way to generate a hash from string content
144/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
145///
146/// # Arguments
147///
148/// * `content` - The string content to hash
149///
150/// # Returns
151///
152/// A 64-bit hash value derived from the content
153pub fn fast_hash(content: &str) -> u64 {
154    let mut hasher = DefaultHasher::new();
155    content.hash(&mut hasher);
156    hasher.finish()
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn test_detect_line_ending_pure_lf() {
165        // Test content with only LF line endings
166        let content = "First line\nSecond line\nThird line\n";
167        assert_eq!(detect_line_ending(content), "\n");
168    }
169
170    #[test]
171    fn test_detect_line_ending_pure_crlf() {
172        // Test content with only CRLF line endings
173        let content = "First line\r\nSecond line\r\nThird line\r\n";
174        assert_eq!(detect_line_ending(content), "\r\n");
175    }
176
177    #[test]
178    fn test_detect_line_ending_mixed_more_lf() {
179        // Test content with mixed line endings where LF is more common
180        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
181        assert_eq!(detect_line_ending(content), "\n");
182    }
183
184    #[test]
185    fn test_detect_line_ending_mixed_more_crlf() {
186        // Test content with mixed line endings where CRLF is more common
187        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
188        assert_eq!(detect_line_ending(content), "\r\n");
189    }
190
191    #[test]
192    fn test_detect_line_ending_empty_string() {
193        // Test empty string - should default to LF
194        let content = "";
195        assert_eq!(detect_line_ending(content), "\n");
196    }
197
198    #[test]
199    fn test_detect_line_ending_single_line_no_ending() {
200        // Test single line without any line endings - should default to LF
201        let content = "This is a single line with no line ending";
202        assert_eq!(detect_line_ending(content), "\n");
203    }
204
205    #[test]
206    fn test_detect_line_ending_equal_lf_and_crlf() {
207        // Test edge case with equal number of CRLF and LF
208        // Since LF count is calculated as total '\n' minus CRLF count,
209        // and the algorithm uses > (not >=), it should default to LF
210        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
211        assert_eq!(detect_line_ending(content), "\n");
212    }
213
214    #[test]
215    fn test_detect_line_ending_single_lf() {
216        // Test with just a single LF
217        let content = "Line 1\n";
218        assert_eq!(detect_line_ending(content), "\n");
219    }
220
221    #[test]
222    fn test_detect_line_ending_single_crlf() {
223        // Test with just a single CRLF
224        let content = "Line 1\r\n";
225        assert_eq!(detect_line_ending(content), "\r\n");
226    }
227
228    #[test]
229    fn test_detect_line_ending_embedded_cr() {
230        // Test with CR characters that are not part of CRLF
231        // These should not affect the count
232        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
233        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
234        assert_eq!(detect_line_ending(content), "\n");
235    }
236}