rumdl_lib/utils/
mkdocs_icons.rs

1/// MkDocs emoji and icon extension support
2///
3/// This module provides support for the MkDocs Material emoji/icons extension,
4/// which allows using shortcodes for various icon sets:
5/// - Material Design Icons: `:material-check:`
6/// - GitHub Octicons: `:octicons-mark-github-16:`
7/// - FontAwesome: `:fontawesome-brands-github:`
8/// - Simple Icons: `:simple-github:`
9/// - Custom icons: `:custom-icon-name:`
10///
11/// ## Syntax
12///
13/// ```markdown
14/// :material-check:           # Material Design icon
15/// :octicons-mark-github-16:  # GitHub Octicon with size
16/// :fontawesome-brands-github: # FontAwesome brand icon
17/// :fontawesome-solid-star:   # FontAwesome solid icon
18/// :simple-github:            # Simple Icons
19/// ```
20///
21/// ## References
22///
23/// - [MkDocs Material Icons](https://squidfunk.github.io/mkdocs-material/reference/icons-emojis/)
24/// - [Python-Markdown Emoji](https://facelessuser.github.io/pymdown-extensions/extensions/emoji/)
25use regex::Regex;
26use std::sync::LazyLock;
27
28/// Pattern to match MkDocs icon shortcodes
29/// Format: `:prefix-name:` or `:prefix-name-modifier:`
30/// Examples: :material-check:, :octicons-mark-github-16:, :fontawesome-brands-github:
31///
32/// Pattern breakdown:
33/// - Starts and ends with `:`
34/// - First part is the icon set prefix (material, octicons, fontawesome, simple, custom, etc.)
35/// - Followed by hyphen-separated parts (name, modifiers, sizes)
36/// - Each part is lowercase alphanumeric with optional underscores
37static ICON_SHORTCODE_PATTERN: LazyLock<Regex> =
38    LazyLock::new(|| Regex::new(r":([a-z][a-z0-9_]*(?:-[a-z0-9_]+)+):").unwrap());
39
40/// Pattern to match standard emoji shortcodes (GitHub style)
41/// Format: `:emoji_name:` or `:emoji-name:`
42/// Examples: :smile:, :thumbsup:, :+1:, :heart:
43static EMOJI_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r":([a-zA-Z0-9_+-]+):").unwrap());
44
45/// Known MkDocs icon set prefixes
46pub const ICON_SET_PREFIXES: &[&str] = &["material", "octicons", "fontawesome", "simple", "custom"];
47
48/// Parsed icon shortcode information
49#[derive(Debug, Clone, PartialEq)]
50pub struct IconShortcode {
51    /// The full shortcode text including colons (e.g., `:material-check:`)
52    pub full_text: String,
53    /// The icon set prefix (e.g., `material`, `octicons`)
54    pub prefix: String,
55    /// The icon name parts after the prefix (e.g., `["check"]` or `["mark", "github", "16"]`)
56    pub name_parts: Vec<String>,
57    /// Start position in the line (0-indexed)
58    pub start: usize,
59    /// End position in the line (0-indexed, exclusive)
60    pub end: usize,
61}
62
63impl IconShortcode {
64    /// Get the full icon name (prefix + name parts joined with hyphens)
65    pub fn full_name(&self) -> String {
66        if self.name_parts.is_empty() {
67            self.prefix.clone()
68        } else {
69            format!("{}-{}", self.prefix, self.name_parts.join("-"))
70        }
71    }
72
73    /// Check if this is a known MkDocs icon set
74    pub fn is_known_icon_set(&self) -> bool {
75        ICON_SET_PREFIXES.iter().any(|&p| self.prefix.starts_with(p))
76    }
77}
78
79/// Check if a line contains icon shortcodes
80#[inline]
81pub fn contains_icon_shortcode(line: &str) -> bool {
82    // Fast path: check for colon first
83    if !line.contains(':') {
84        return false;
85    }
86    ICON_SHORTCODE_PATTERN.is_match(line)
87}
88
89/// Check if a line contains any emoji/icon shortcode (both MkDocs icons and standard emoji)
90#[inline]
91pub fn contains_any_shortcode(line: &str) -> bool {
92    if !line.contains(':') {
93        return false;
94    }
95    ICON_SHORTCODE_PATTERN.is_match(line) || EMOJI_SHORTCODE_PATTERN.is_match(line)
96}
97
98/// Find all icon shortcodes in a line
99pub fn find_icon_shortcodes(line: &str) -> Vec<IconShortcode> {
100    if !line.contains(':') {
101        return Vec::new();
102    }
103
104    let mut results = Vec::new();
105
106    for m in ICON_SHORTCODE_PATTERN.find_iter(line) {
107        let full_text = m.as_str().to_string();
108        // Remove the surrounding colons and split by hyphen
109        let inner = &full_text[1..full_text.len() - 1];
110        let parts: Vec<&str> = inner.split('-').collect();
111
112        if parts.is_empty() {
113            continue;
114        }
115
116        let prefix = parts[0].to_string();
117        let name_parts: Vec<String> = parts[1..].iter().map(|&s| s.to_string()).collect();
118
119        results.push(IconShortcode {
120            full_text,
121            prefix,
122            name_parts,
123            start: m.start(),
124            end: m.end(),
125        });
126    }
127
128    results
129}
130
131/// Check if a position in a line is within an icon shortcode
132pub fn is_in_icon_shortcode(line: &str, position: usize) -> bool {
133    for shortcode in find_icon_shortcodes(line) {
134        if shortcode.start <= position && position < shortcode.end {
135            return true;
136        }
137    }
138    false
139}
140
141/// Check if a position in a line is within any emoji/icon shortcode
142pub fn is_in_any_shortcode(line: &str, position: usize) -> bool {
143    if !line.contains(':') {
144        return false;
145    }
146
147    // Check MkDocs icon shortcodes
148    for m in ICON_SHORTCODE_PATTERN.find_iter(line) {
149        if m.start() <= position && position < m.end() {
150            return true;
151        }
152    }
153
154    // Check standard emoji shortcodes
155    for m in EMOJI_SHORTCODE_PATTERN.find_iter(line) {
156        if m.start() <= position && position < m.end() {
157            return true;
158        }
159    }
160
161    false
162}
163
164/// Replace icon shortcodes with placeholder text to avoid false positives in other rules
165///
166/// This is useful for rules like MD037 that might incorrectly flag
167/// characters inside icon shortcodes.
168pub fn mask_icon_shortcodes(line: &str) -> String {
169    if !line.contains(':') {
170        return line.to_string();
171    }
172
173    let mut result = line.to_string();
174    let shortcodes = find_icon_shortcodes(line);
175
176    // Process in reverse order to maintain correct positions
177    for shortcode in shortcodes.into_iter().rev() {
178        let replacement = " ".repeat(shortcode.end - shortcode.start);
179        result.replace_range(shortcode.start..shortcode.end, &replacement);
180    }
181
182    result
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn test_contains_icon_shortcode() {
191        // Valid MkDocs icon shortcodes
192        assert!(contains_icon_shortcode("Check :material-check: this"));
193        assert!(contains_icon_shortcode(":octicons-mark-github-16:"));
194        assert!(contains_icon_shortcode(":fontawesome-brands-github:"));
195        assert!(contains_icon_shortcode(":fontawesome-solid-star:"));
196        assert!(contains_icon_shortcode(":simple-github:"));
197
198        // Not icon shortcodes (no hyphen in name)
199        assert!(!contains_icon_shortcode(":smile:"));
200        assert!(!contains_icon_shortcode(":thumbsup:"));
201        assert!(!contains_icon_shortcode("No icons here"));
202        assert!(!contains_icon_shortcode("Just text"));
203    }
204
205    #[test]
206    fn test_find_icon_shortcodes_material() {
207        let shortcodes = find_icon_shortcodes("Click :material-check: to confirm");
208        assert_eq!(shortcodes.len(), 1);
209        assert_eq!(shortcodes[0].full_text, ":material-check:");
210        assert_eq!(shortcodes[0].prefix, "material");
211        assert_eq!(shortcodes[0].name_parts, vec!["check"]);
212        assert!(shortcodes[0].is_known_icon_set());
213    }
214
215    #[test]
216    fn test_find_icon_shortcodes_octicons() {
217        let shortcodes = find_icon_shortcodes(":octicons-mark-github-16:");
218        assert_eq!(shortcodes.len(), 1);
219        assert_eq!(shortcodes[0].prefix, "octicons");
220        assert_eq!(shortcodes[0].name_parts, vec!["mark", "github", "16"]);
221        assert!(shortcodes[0].is_known_icon_set());
222    }
223
224    #[test]
225    fn test_find_icon_shortcodes_fontawesome() {
226        let shortcodes = find_icon_shortcodes(":fontawesome-brands-github:");
227        assert_eq!(shortcodes.len(), 1);
228        assert_eq!(shortcodes[0].prefix, "fontawesome");
229        assert_eq!(shortcodes[0].name_parts, vec!["brands", "github"]);
230
231        let shortcodes = find_icon_shortcodes(":fontawesome-solid-star:");
232        assert_eq!(shortcodes.len(), 1);
233        assert_eq!(shortcodes[0].name_parts, vec!["solid", "star"]);
234    }
235
236    #[test]
237    fn test_find_icon_shortcodes_multiple() {
238        let shortcodes = find_icon_shortcodes(":material-check: and :material-close:");
239        assert_eq!(shortcodes.len(), 2);
240        assert_eq!(shortcodes[0].full_text, ":material-check:");
241        assert_eq!(shortcodes[1].full_text, ":material-close:");
242    }
243
244    #[test]
245    fn test_icon_shortcode_full_name() {
246        let shortcodes = find_icon_shortcodes(":octicons-mark-github-16:");
247        assert_eq!(shortcodes[0].full_name(), "octicons-mark-github-16");
248    }
249
250    #[test]
251    fn test_is_in_icon_shortcode() {
252        let line = "Text :material-check: more text";
253        assert!(!is_in_icon_shortcode(line, 0)); // "T"
254        assert!(!is_in_icon_shortcode(line, 4)); // " "
255        assert!(is_in_icon_shortcode(line, 5)); // ":"
256        assert!(is_in_icon_shortcode(line, 10)); // "a"
257        assert!(is_in_icon_shortcode(line, 20)); // ":"
258        assert!(!is_in_icon_shortcode(line, 21)); // " "
259    }
260
261    #[test]
262    fn test_mask_icon_shortcodes() {
263        let line = "Text :material-check: more";
264        let masked = mask_icon_shortcodes(line);
265        assert_eq!(masked, "Text                  more");
266        assert_eq!(masked.len(), line.len());
267
268        let line2 = ":material-a: and :material-b:";
269        let masked2 = mask_icon_shortcodes(line2);
270        assert!(!masked2.contains(":material"));
271        assert_eq!(masked2.len(), line2.len());
272    }
273
274    #[test]
275    fn test_shortcode_positions() {
276        let line = "Pre :material-check: post";
277        let shortcodes = find_icon_shortcodes(line);
278        assert_eq!(shortcodes.len(), 1);
279        assert_eq!(shortcodes[0].start, 4);
280        assert_eq!(shortcodes[0].end, 20);
281        assert_eq!(&line[shortcodes[0].start..shortcodes[0].end], ":material-check:");
282    }
283
284    #[test]
285    fn test_unknown_icon_set() {
286        let shortcodes = find_icon_shortcodes(":custom-my-icon:");
287        assert_eq!(shortcodes.len(), 1);
288        assert_eq!(shortcodes[0].prefix, "custom");
289        assert!(shortcodes[0].is_known_icon_set());
290
291        let shortcodes = find_icon_shortcodes(":unknown-prefix-icon:");
292        assert_eq!(shortcodes.len(), 1);
293        assert!(!shortcodes[0].is_known_icon_set());
294    }
295
296    #[test]
297    fn test_emoji_vs_icon() {
298        // Standard emoji (single word) - not matched by icon pattern
299        assert!(!contains_icon_shortcode(":smile:"));
300        assert!(!contains_icon_shortcode(":+1:"));
301
302        // MkDocs icons (hyphenated) - matched
303        assert!(contains_icon_shortcode(":material-check:"));
304
305        // But both are "any shortcode"
306        assert!(contains_any_shortcode(":smile:"));
307        assert!(contains_any_shortcode(":material-check:"));
308    }
309
310    #[test]
311    fn test_is_in_any_shortcode() {
312        let line = ":smile: and :material-check:";
313
314        // In emoji
315        assert!(is_in_any_shortcode(line, 0)); // ":"
316        assert!(is_in_any_shortcode(line, 3)); // "l"
317        assert!(is_in_any_shortcode(line, 6)); // ":"
318
319        // Between shortcodes
320        assert!(!is_in_any_shortcode(line, 7)); // " "
321        assert!(!is_in_any_shortcode(line, 10)); // "d"
322
323        // In icon
324        assert!(is_in_any_shortcode(line, 12)); // ":"
325        assert!(is_in_any_shortcode(line, 20)); // "c"
326    }
327
328    #[test]
329    fn test_underscore_in_icon_names() {
330        let shortcodes = find_icon_shortcodes(":material-file_download:");
331        assert_eq!(shortcodes.len(), 1);
332        assert_eq!(shortcodes[0].name_parts, vec!["file_download"]);
333    }
334}