pulldown_html_ext/utils.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
//! Utility functions for HTML rendering and string manipulation
use pulldown_cmark::escape::StrWrite;
/// Escape special HTML characters in a string
///
/// # Arguments
///
/// * `output` - The string buffer to write to
/// * `text` - The text to escape
///
/// # Example
///
/// ```
/// let mut output = String::new();
/// pulldown_html_ext::utils::escape_html(&mut output, "<div>test</div>");
/// assert_eq!(output, "<div>test</div>");
/// ```
pub fn escape_html(output: &mut String, text: &str) {
// TODO - Opt for using the `pulldown-cmark-escape` crate here
for c in text.chars() {
match c {
'<' => output.push_str("<"),
'>' => output.push_str(">"),
'"' => output.push_str("""),
'&' => output.push_str("&"),
'\'' => output.push_str("'"),
_ => output.push(c),
}
}
}
/// Escape special characters in URLs
///
/// # Arguments
///
/// * `output` - The string buffer to write to
/// * `href` - The URL to escape
///
/// # Example
///
/// ```
/// let mut output = String::new();
/// pulldown_html_ext::utils::escape_href(&mut output, "https://example.com/path with spaces");
/// assert!(output.contains("%20"));
/// ```
pub fn escape_href(output: &mut String, href: &str) {
for c in href.chars() {
match c {
'<' | '>' | '"' | '\'' | ' ' | '\n' | '\r' | '\t' => {
write!(output, "%{:02X}", c as u32).unwrap();
}
c => output.push(c),
}
}
}
/// Sanitize a string for use as an HTML ID
///
/// Converts a string to lowercase, replaces spaces with hyphens,
/// and removes any characters that aren't alphanumeric or hyphens.
///
/// # Arguments
///
/// * `text` - The text to sanitize
///
/// # Example
///
/// ```
/// let id = pulldown_html_ext::utils::sanitize_id("Hello World! 123");
/// assert_eq!(id, "hello-world-123");
/// ```
pub fn sanitize_id(text: &str) -> String {
text.chars()
.map(|c| {
if c.is_alphanumeric() {
c.to_ascii_lowercase()
} else {
'-'
}
})
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<&str>>()
.join("-")
}
/// Count the length of a string in Unicode scalars
///
/// This is useful for generating heading IDs and other cases
/// where we need to know the true length of a string.
///
/// # Arguments
///
/// * `text` - The text to measure
///
/// # Example
///
/// ```
/// let len = pulldown_html_ext::utils::unicode_length("Hello 👋");
/// assert_eq!(len, 7); // 6 ASCII chars + 1 emoji
/// ```
pub fn unicode_length(text: &str) -> usize {
text.chars().count()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_html() {
let mut output = String::new();
escape_html(&mut output, "<div class=\"test\">&");
assert_eq!(output, "<div class="test">&");
}
#[test]
fn test_escape_href() {
let mut output = String::new();
escape_href(
&mut output,
"https://example.com/path with spaces?q=test&x=1",
);
assert!(output.contains("%20"));
assert!(!output.contains(' '));
assert!(output.contains('&')); // URL parameters shouldn't be escaped
}
#[test]
fn test_sanitize_id() {
assert_eq!(sanitize_id("Hello World!"), "hello-world");
assert_eq!(sanitize_id("Test 123"), "test-123");
assert_eq!(sanitize_id("Multiple Spaces"), "multiple-spaces");
assert_eq!(sanitize_id("special@#chars"), "special-chars");
assert_eq!(sanitize_id("--multiple---dashes--"), "multiple-dashes");
}
#[test]
fn test_unicode_length() {
assert_eq!(unicode_length("Hello"), 5);
assert_eq!(unicode_length("👋 Hello"), 7);
assert_eq!(unicode_length("汉å—"), 2);
assert_eq!(unicode_length(""), 0);
}
#[test]
fn test_complex_escaping() {
let mut output = String::new();
escape_html(&mut output, "<script>alert('xss')</script>");
assert_eq!(
output,
"<script>alert('xss')</script>"
);
}
#[test]
fn test_href_special_chars() {
let mut output = String::new();
escape_href(&mut output, "/path/with\"quotes'and<brackets>");
assert!(output.contains("%22")); // escaped quote
assert!(output.contains("%27")); // escaped single quote
assert!(output.contains("%3C")); // escaped <
assert!(output.contains("%3E")); // escaped >
}
}