use std::borrow::Cow;
const ALLOWED_TAGS: &[&str] =
&["strong", "b", "em", "i", "u", "br", "a", "span", "sub", "sup"];
const ALLOWED_STYLE_PROPS: &[&str] = &[
"font-weight",
"font-style",
"text-decoration",
"color",
"background-color",
];
const ALLOWED_URL_SCHEMES: &[&str] = &["http://", "https://", "mailto:", "tel:"];
pub fn email_safe_html(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut chars = input.char_indices().peekable();
while let Some((i, c)) = chars.next() {
if c == '<' {
let rest = &input[i..];
let close = rest.find('>').map(|p| i + p + 1);
match close {
Some(end) => {
let raw = &input[i..end];
let cleaned = clean_tag(raw);
out.push_str(&cleaned);
while let Some(&(idx, _)) = chars.peek() {
if idx >= end {
break;
}
chars.next();
}
}
None => {
}
}
} else {
out.push(c);
}
}
out
}
fn clean_tag(raw: &str) -> Cow<'_, str> {
let inner = &raw[1..raw.len() - 1];
if inner.is_empty() {
return Cow::Borrowed("");
}
let is_close = inner.starts_with('/');
let body = if is_close { &inner[1..] } else { inner };
let body = body.trim();
let (tag_name_raw, rest) = body.split_once(char::is_whitespace).unwrap_or((body, ""));
let tag_name = tag_name_raw.trim_end_matches('/').to_ascii_lowercase();
if !ALLOWED_TAGS.contains(&tag_name.as_str()) {
return Cow::Borrowed("");
}
if is_close {
return Cow::Owned(format!("</{tag_name}>"));
}
let void = matches!(tag_name.as_str(), "br");
let attrs = if rest.is_empty() {
String::new()
} else {
clean_attrs(&tag_name, rest)
};
let close_marker = if void { " /" } else { "" };
if attrs.is_empty() {
Cow::Owned(format!("<{tag_name}{close_marker}>"))
} else {
Cow::Owned(format!("<{tag_name} {attrs}{close_marker}>"))
}
}
fn clean_attrs(tag: &str, attrs_raw: &str) -> String {
let mut out = Vec::new();
let bytes = attrs_raw.as_bytes();
let mut i = 0;
while i < bytes.len() {
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
break;
}
let name_start = i;
while i < bytes.len() && bytes[i] != b'=' && !bytes[i].is_ascii_whitespace() {
i += 1;
}
let name = attrs_raw[name_start..i].to_ascii_lowercase();
if name.is_empty() {
break;
}
if i >= bytes.len() || bytes[i] != b'=' {
continue;
}
i += 1; if i >= bytes.len() {
break;
}
let quote = bytes[i];
if quote != b'"' && quote != b'\'' {
while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
i += 1;
}
continue;
}
i += 1; let val_start = i;
while i < bytes.len() && bytes[i] != quote {
i += 1;
}
let value = &attrs_raw[val_start..i];
if i < bytes.len() {
i += 1; }
if name.starts_with("on") {
continue;
}
if let Some(cleaned) = clean_one_attr(tag, &name, value) {
out.push(cleaned);
}
}
out.join(" ")
}
fn clean_one_attr(tag: &str, name: &str, value: &str) -> Option<String> {
match name {
"href" => {
if tag != "a" {
return None;
}
let scheme_ok = ALLOWED_URL_SCHEMES.iter().any(|s| {
value.to_lowercase().starts_with(s)
});
if scheme_ok {
Some(format!(
"href=\"{}\"",
escape_attr(value)
))
} else {
None
}
}
"target" => {
if tag != "a" {
return None;
}
if value == "_blank" {
Some("target=\"_blank\" rel=\"noopener\"".to_string())
} else {
None
}
}
"style" => {
let cleaned = clean_inline_style(value);
if cleaned.is_empty() {
None
} else {
Some(format!("style=\"{}\"", escape_attr(&cleaned)))
}
}
"title" if tag == "a" => Some(format!(
"title=\"{}\"",
escape_attr(value)
)),
_ => None,
}
}
fn clean_inline_style(value: &str) -> String {
let mut keep = Vec::new();
for decl in value.split(';') {
let decl = decl.trim();
if decl.is_empty() {
continue;
}
let Some((prop, val)) = decl.split_once(':') else {
continue;
};
let prop = prop.trim().to_ascii_lowercase();
let val = val.trim();
let val_lower = val.to_ascii_lowercase();
if !ALLOWED_STYLE_PROPS.contains(&prop.as_str()) {
continue;
}
if val_lower.contains("expression(")
|| val_lower.contains("url(")
|| val_lower.contains("javascript:")
|| val_lower.contains("@import")
{
continue;
}
if val.len() > 80 {
continue;
}
keep.push(format!("{prop}: {val}"));
}
keep.join("; ")
}
fn escape_attr(s: &str) -> String {
s.replace('&', "&").replace('"', """).replace('<', "<")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn allows_strong_and_em() {
let out = email_safe_html("<strong>bold</strong> and <em>italic</em>");
assert_eq!(out, "<strong>bold</strong> and <em>italic</em>");
}
#[test]
fn strips_script_tag() {
let out = email_safe_html("hi <script>alert(1)</script> bye");
assert_eq!(out, "hi alert(1) bye");
}
#[test]
fn strips_event_handlers() {
let out = email_safe_html(
r#"<a href="https://x.com" onclick="evil()">link</a>"#
);
assert!(out.contains("href=\"https://x.com\""));
assert!(!out.contains("onclick"));
}
#[test]
fn rejects_javascript_href() {
let out = email_safe_html(r#"<a href="javascript:alert(1)">link</a>"#);
assert!(out.contains("<a"));
assert!(!out.contains("javascript"));
assert!(!out.contains("href"));
}
#[test]
fn allows_mailto_href() {
let out = email_safe_html(r#"<a href="mailto:a@b.com">email</a>"#);
assert!(out.contains("mailto:a@b.com"));
}
#[test]
fn keeps_inline_style_whitelist() {
let out = email_safe_html(
r#"<span style="color: #ff0000; font-weight: bold">red</span>"#
);
assert!(out.contains("color: #ff0000"));
assert!(out.contains("font-weight: bold"));
}
#[test]
fn strips_unknown_style_props() {
let out = email_safe_html(
r#"<span style="position: fixed; color: red">x</span>"#
);
assert!(!out.contains("position"));
assert!(out.contains("color: red"));
}
#[test]
fn strips_css_url_in_style() {
let out = email_safe_html(
r#"<span style="background-color: url(http://evil.com)">x</span>"#
);
assert!(!out.contains("url("));
}
#[test]
fn strips_class_attribute() {
let out = email_safe_html(r#"<strong class="evil">hi</strong>"#);
assert!(!out.contains("class"));
assert!(out.contains("<strong>"));
}
#[test]
fn br_self_closes() {
let out = email_safe_html("line1<br>line2");
assert!(out.contains("<br />") || out.contains("<br/>"));
}
#[test]
fn drops_iframe() {
let out = email_safe_html(
r#"<iframe src="https://x.com">x</iframe>"#
);
assert!(!out.contains("iframe"));
assert!(out.contains("x"));
}
#[test]
fn unterminated_tag_drops_lt_keeps_text() {
let out = email_safe_html("hi <unterminated");
assert_eq!(out, "hi unterminated");
}
#[test]
fn target_blank_adds_noopener() {
let out = email_safe_html(
r#"<a href="https://x.com" target="_blank">x</a>"#
);
assert!(out.contains("target=\"_blank\""));
assert!(out.contains("rel=\"noopener\""));
}
#[test]
fn variable_placeholders_pass_through() {
let out = email_safe_html("<strong>Hola {{name}}</strong>");
assert_eq!(out, "<strong>Hola {{name}}</strong>");
}
#[test]
fn nested_allowed_tags_preserved() {
let out = email_safe_html("<strong><em>bold-italic</em></strong>");
assert_eq!(out, "<strong><em>bold-italic</em></strong>");
}
}