pulldown_html_ext/
utils.rs

1//! Utility functions for HTML rendering and string manipulation
2
3use pulldown_cmark_escape::StrWrite;
4/// Escape special HTML characters in a string
5///
6/// # Arguments
7///
8/// * `output` - The string buffer to write to
9/// * `text` - The text to escape
10///
11/// # Example
12///
13/// ```
14/// let mut output = String::new();
15/// pulldown_html_ext::utils::escape_html(&mut output, "<div>test</div>");
16/// assert_eq!(output, "&lt;div&gt;test&lt;/div&gt;");
17/// ```
18pub fn escape_html(output: &mut String, text: &str) {
19    // TODO - Opt for using the `pulldown-cmark-escape` crate here
20    for c in text.chars() {
21        match c {
22            '<' => output.push_str("&lt;"),
23            '>' => output.push_str("&gt;"),
24            '"' => output.push_str("&quot;"),
25            '&' => output.push_str("&amp;"),
26            '\'' => output.push_str("&#x27;"),
27            _ => output.push(c),
28        }
29    }
30}
31
32/// Escape special characters in URLs
33///
34/// # Arguments
35///
36/// * `output` - The string buffer to write to
37/// * `href` - The URL to escape
38///
39/// # Example
40///
41/// ```
42/// let mut output = String::new();
43/// pulldown_html_ext::utils::escape_href(&mut output, "https://example.com/path with spaces");
44/// assert!(output.contains("%20"));
45/// ```
46pub fn escape_href(output: &mut String, href: &str) {
47    for c in href.chars() {
48        match c {
49            '<' | '>' | '"' | '\'' | ' ' | '\n' | '\r' | '\t' => {
50                write!(output, "%{:02X}", c as u32).unwrap();
51            }
52            c => output.push(c),
53        }
54    }
55}
56
57/// Sanitize a string for use as an HTML ID
58///
59/// Converts a string to lowercase, replaces spaces with hyphens,
60/// and removes any characters that aren't alphanumeric or hyphens.
61///
62/// # Arguments
63///
64/// * `text` - The text to sanitize
65///
66/// # Example
67///
68/// ```
69/// let id = pulldown_html_ext::utils::sanitize_id("Hello World! 123");
70/// assert_eq!(id, "hello-world-123");
71/// ```
72pub fn sanitize_id(text: &str) -> String {
73    text.chars()
74        .map(|c| {
75            if c.is_alphanumeric() {
76                c.to_ascii_lowercase()
77            } else {
78                '-'
79            }
80        })
81        .collect::<String>()
82        .split('-')
83        .filter(|s| !s.is_empty())
84        .collect::<Vec<&str>>()
85        .join("-")
86}
87
88/// Count the length of a string in Unicode scalars
89///
90/// This is useful for generating heading IDs and other cases
91/// where we need to know the true length of a string.
92///
93/// # Arguments
94///
95/// * `text` - The text to measure
96///
97/// # Example
98///
99/// ```
100/// let len = pulldown_html_ext::utils::unicode_length("Hello 👋");
101/// assert_eq!(len, 7); // 6 ASCII chars + 1 emoji
102/// ```
103pub fn unicode_length(text: &str) -> usize {
104    text.chars().count()
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn test_escape_html() {
113        let mut output = String::new();
114        escape_html(&mut output, "<div class=\"test\">&");
115        assert_eq!(output, "&lt;div class=&quot;test&quot;&gt;&amp;");
116    }
117
118    #[test]
119    fn test_escape_href() {
120        let mut output = String::new();
121        escape_href(
122            &mut output,
123            "https://example.com/path with spaces?q=test&x=1",
124        );
125        assert!(output.contains("%20"));
126        assert!(!output.contains(' '));
127        assert!(output.contains('&')); // URL parameters shouldn't be escaped
128    }
129
130    #[test]
131    fn test_sanitize_id() {
132        assert_eq!(sanitize_id("Hello World!"), "hello-world");
133        assert_eq!(sanitize_id("Test 123"), "test-123");
134        assert_eq!(sanitize_id("Multiple   Spaces"), "multiple-spaces");
135        assert_eq!(sanitize_id("special@#chars"), "special-chars");
136        assert_eq!(sanitize_id("--multiple---dashes--"), "multiple-dashes");
137    }
138
139    #[test]
140    fn test_unicode_length() {
141        assert_eq!(unicode_length("Hello"), 5);
142        assert_eq!(unicode_length("👋 Hello"), 7);
143        assert_eq!(unicode_length("汉字"), 2);
144        assert_eq!(unicode_length(""), 0);
145    }
146
147    #[test]
148    fn test_complex_escaping() {
149        let mut output = String::new();
150        escape_html(&mut output, "<script>alert('xss')</script>");
151        assert_eq!(
152            output,
153            "&lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;"
154        );
155    }
156
157    #[test]
158    fn test_href_special_chars() {
159        let mut output = String::new();
160        escape_href(&mut output, "/path/with\"quotes'and<brackets>");
161        assert!(output.contains("%22")); // escaped quote
162        assert!(output.contains("%27")); // escaped single quote
163        assert!(output.contains("%3C")); // escaped <
164        assert!(output.contains("%3E")); // escaped >
165    }
166}