rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod code_block_utils;
7// DocumentStructure has been merged into LintContext
8// pub mod document_structure;
9pub mod early_returns;
10pub mod element_cache;
11pub mod emphasis_utils;
12pub mod fix_utils;
13pub mod header_id_utils;
14pub mod jinja_utils;
15pub mod kramdown_utils;
16pub mod line_ending;
17pub mod markdown_elements;
18pub mod mkdocs_abbreviations;
19pub mod mkdocs_admonitions;
20pub mod mkdocs_attr_list;
21pub mod mkdocs_common;
22pub mod mkdocs_critic;
23pub mod mkdocs_definition_lists;
24pub mod mkdocs_extensions;
25pub mod mkdocs_footnotes;
26pub mod mkdocs_icons;
27pub mod mkdocs_patterns;
28pub mod mkdocs_snippets;
29pub mod mkdocs_tabs;
30pub mod mkdocs_test_utils;
31pub mod mkdocstrings_refs;
32pub mod quarto_divs;
33pub mod range_utils;
34pub mod regex_cache;
35pub mod skip_context;
36pub mod string_interner;
37pub mod table_utils;
38pub mod text_reflow;
39pub mod utf8_offsets;
40
41pub use code_block_utils::CodeBlockUtils;
42// pub use document_structure::DocumentStructure;
43pub use line_ending::{
44    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
45    normalize_line_ending,
46};
47pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
48pub use range_utils::LineIndex;
49
50/// Check if a line is a definition list item (Extended Markdown)
51///
52/// Definition lists use the pattern:
53/// ```text
54/// Term
55/// : Definition
56/// ```
57///
58/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
59pub fn is_definition_list_item(line: &str) -> bool {
60    let trimmed = line.trim_start();
61    trimmed.starts_with(": ")
62        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
63}
64
65/// Trait for string-related extensions
66pub trait StrExt {
67    /// Replace trailing spaces with a specified replacement string
68    fn replace_trailing_spaces(&self, replacement: &str) -> String;
69
70    /// Check if the string has trailing whitespace
71    fn has_trailing_spaces(&self) -> bool;
72
73    /// Count the number of trailing spaces in the string
74    fn trailing_spaces(&self) -> usize;
75}
76
77impl StrExt for str {
78    fn replace_trailing_spaces(&self, replacement: &str) -> String {
79        // Custom implementation to handle both newlines and tabs specially
80
81        // Check if string ends with newline
82        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
83            (stripped, true)
84        } else {
85            (self, false)
86        };
87
88        // Find where the trailing spaces begin
89        let mut non_space_len = content.len();
90        for c in content.chars().rev() {
91            if c == ' ' {
92                non_space_len -= 1;
93            } else {
94                break;
95            }
96        }
97
98        // Build the final string
99        let mut result =
100            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
101        result.push_str(&content[..non_space_len]);
102        result.push_str(replacement);
103        if ends_with_newline {
104            result.push('\n');
105        }
106
107        result
108    }
109
110    fn has_trailing_spaces(&self) -> bool {
111        self.trailing_spaces() > 0
112    }
113
114    fn trailing_spaces(&self) -> usize {
115        // Custom implementation to handle both newlines and tabs specially
116
117        // Prepare the string without newline if it ends with one
118        let content = self.strip_suffix('\n').unwrap_or(self);
119
120        // Count only trailing spaces at the end, not tabs
121        let mut space_count = 0;
122        for c in content.chars().rev() {
123            if c == ' ' {
124                space_count += 1;
125            } else {
126                break;
127            }
128        }
129
130        space_count
131    }
132}
133
134use std::collections::hash_map::DefaultHasher;
135use std::hash::{Hash, Hasher};
136
137/// Fast hash function for string content
138///
139/// This utility function provides a quick way to generate a hash from string content
140/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
141///
142/// # Arguments
143///
144/// * `content` - The string content to hash
145///
146/// # Returns
147///
148/// A 64-bit hash value derived from the content
149pub fn fast_hash(content: &str) -> u64 {
150    let mut hasher = DefaultHasher::new();
151    content.hash(&mut hasher);
152    hasher.finish()
153}
154
155#[cfg(test)]
156mod tests {
157    use super::*;
158
159    #[test]
160    fn test_detect_line_ending_pure_lf() {
161        // Test content with only LF line endings
162        let content = "First line\nSecond line\nThird line\n";
163        assert_eq!(detect_line_ending(content), "\n");
164    }
165
166    #[test]
167    fn test_detect_line_ending_pure_crlf() {
168        // Test content with only CRLF line endings
169        let content = "First line\r\nSecond line\r\nThird line\r\n";
170        assert_eq!(detect_line_ending(content), "\r\n");
171    }
172
173    #[test]
174    fn test_detect_line_ending_mixed_more_lf() {
175        // Test content with mixed line endings where LF is more common
176        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
177        assert_eq!(detect_line_ending(content), "\n");
178    }
179
180    #[test]
181    fn test_detect_line_ending_mixed_more_crlf() {
182        // Test content with mixed line endings where CRLF is more common
183        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
184        assert_eq!(detect_line_ending(content), "\r\n");
185    }
186
187    #[test]
188    fn test_detect_line_ending_empty_string() {
189        // Test empty string - should default to LF
190        let content = "";
191        assert_eq!(detect_line_ending(content), "\n");
192    }
193
194    #[test]
195    fn test_detect_line_ending_single_line_no_ending() {
196        // Test single line without any line endings - should default to LF
197        let content = "This is a single line with no line ending";
198        assert_eq!(detect_line_ending(content), "\n");
199    }
200
201    #[test]
202    fn test_detect_line_ending_equal_lf_and_crlf() {
203        // Test edge case with equal number of CRLF and LF
204        // Since LF count is calculated as total '\n' minus CRLF count,
205        // and the algorithm uses > (not >=), it should default to LF
206        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
207        assert_eq!(detect_line_ending(content), "\n");
208    }
209
210    #[test]
211    fn test_detect_line_ending_single_lf() {
212        // Test with just a single LF
213        let content = "Line 1\n";
214        assert_eq!(detect_line_ending(content), "\n");
215    }
216
217    #[test]
218    fn test_detect_line_ending_single_crlf() {
219        // Test with just a single CRLF
220        let content = "Line 1\r\n";
221        assert_eq!(detect_line_ending(content), "\r\n");
222    }
223
224    #[test]
225    fn test_detect_line_ending_embedded_cr() {
226        // Test with CR characters that are not part of CRLF
227        // These should not affect the count
228        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
229        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
230        assert_eq!(detect_line_ending(content), "\n");
231    }
232}