pulldown_html_ext/
utils.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
//! Utility functions for HTML rendering and string manipulation

use pulldown_cmark::escape::StrWrite;
/// Escape special HTML characters in a string
///
/// # Arguments
///
/// * `output` - The string buffer to write to
/// * `text` - The text to escape
///
/// # Example
///
/// ```
/// let mut output = String::new();
/// pulldown_html_ext::utils::escape_html(&mut output, "<div>test</div>");
/// assert_eq!(output, "&lt;div&gt;test&lt;/div&gt;");
/// ```
pub fn escape_html(output: &mut String, text: &str) {
    // TODO - Opt for using the `pulldown-cmark-escape` crate here
    for c in text.chars() {
        match c {
            '<' => output.push_str("&lt;"),
            '>' => output.push_str("&gt;"),
            '"' => output.push_str("&quot;"),
            '&' => output.push_str("&amp;"),
            '\'' => output.push_str("&#x27;"),
            _ => output.push(c),
        }
    }
}

/// Escape special characters in URLs
///
/// # Arguments
///
/// * `output` - The string buffer to write to
/// * `href` - The URL to escape
///
/// # Example
///
/// ```
/// let mut output = String::new();
/// pulldown_html_ext::utils::escape_href(&mut output, "https://example.com/path with spaces");
/// assert!(output.contains("%20"));
/// ```
pub fn escape_href(output: &mut String, href: &str) {
    for c in href.chars() {
        match c {
            '<' | '>' | '"' | '\'' | ' ' | '\n' | '\r' | '\t' => {
                write!(output, "%{:02X}", c as u32).unwrap();
            }
            c => output.push(c),
        }
    }
}

/// Sanitize a string for use as an HTML ID
///
/// Converts a string to lowercase, replaces spaces with hyphens,
/// and removes any characters that aren't alphanumeric or hyphens.
///
/// # Arguments
///
/// * `text` - The text to sanitize
///
/// # Example
///
/// ```
/// let id = pulldown_html_ext::utils::sanitize_id("Hello World! 123");
/// assert_eq!(id, "hello-world-123");
/// ```
pub fn sanitize_id(text: &str) -> String {
    text.chars()
        .map(|c| {
            if c.is_alphanumeric() {
                c.to_ascii_lowercase()
            } else {
                '-'
            }
        })
        .collect::<String>()
        .split('-')
        .filter(|s| !s.is_empty())
        .collect::<Vec<&str>>()
        .join("-")
}

/// Count the length of a string in Unicode scalars
///
/// This is useful for generating heading IDs and other cases
/// where we need to know the true length of a string.
///
/// # Arguments
///
/// * `text` - The text to measure
///
/// # Example
///
/// ```
/// let len = pulldown_html_ext::utils::unicode_length("Hello 👋");
/// assert_eq!(len, 7); // 6 ASCII chars + 1 emoji
/// ```
pub fn unicode_length(text: &str) -> usize {
    text.chars().count()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_escape_html() {
        let mut output = String::new();
        escape_html(&mut output, "<div class=\"test\">&");
        assert_eq!(output, "&lt;div class=&quot;test&quot;&gt;&amp;");
    }

    #[test]
    fn test_escape_href() {
        let mut output = String::new();
        escape_href(
            &mut output,
            "https://example.com/path with spaces?q=test&x=1",
        );
        assert!(output.contains("%20"));
        assert!(!output.contains(' '));
        assert!(output.contains('&')); // URL parameters shouldn't be escaped
    }

    #[test]
    fn test_sanitize_id() {
        assert_eq!(sanitize_id("Hello World!"), "hello-world");
        assert_eq!(sanitize_id("Test 123"), "test-123");
        assert_eq!(sanitize_id("Multiple   Spaces"), "multiple-spaces");
        assert_eq!(sanitize_id("special@#chars"), "special-chars");
        assert_eq!(sanitize_id("--multiple---dashes--"), "multiple-dashes");
    }

    #[test]
    fn test_unicode_length() {
        assert_eq!(unicode_length("Hello"), 5);
        assert_eq!(unicode_length("👋 Hello"), 7);
        assert_eq!(unicode_length("汉字"), 2);
        assert_eq!(unicode_length(""), 0);
    }

    #[test]
    fn test_complex_escaping() {
        let mut output = String::new();
        escape_html(&mut output, "<script>alert('xss')</script>");
        assert_eq!(
            output,
            "&lt;script&gt;alert(&#x27;xss&#x27;)&lt;/script&gt;"
        );
    }

    #[test]
    fn test_href_special_chars() {
        let mut output = String::new();
        escape_href(&mut output, "/path/with\"quotes'and<brackets>");
        assert!(output.contains("%22")); // escaped quote
        assert!(output.contains("%27")); // escaped single quote
        assert!(output.contains("%3C")); // escaped <
        assert!(output.contains("%3E")); // escaped >
    }
}