#![deny(unsafe_code)]
#![deny(missing_docs)]
#[cfg(ammonia_unstable)]
pub mod rcdom;
#[cfg(not(ammonia_unstable))]
mod rcdom;
mod style;
use html5ever::interface::Attribute;
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tree_builder::{NodeOrText, TreeSink};
use html5ever::{driver as html, local_name, namespace_url, ns, QualName};
use maplit::{hashmap, hashset};
use std::sync::LazyLock;
use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
use std::borrow::{Borrow, Cow};
use std::cell::Cell;
use std::cmp::max;
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Display};
use std::io;
use std::iter::IntoIterator as IntoIter;
use std::mem;
use std::rc::Rc;
use std::str::FromStr;
use tendril::stream::TendrilSink;
use tendril::StrTendril;
use tendril::{format_tendril, ByteTendril};
pub use url::Url;
use html5ever::buffer_queue::BufferQueue;
use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
pub use url;
static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
pub fn clean(src: &str) -> String {
AMMONIA.clean(src).to_string()
}
pub fn clean_text(src: &str) -> String {
let mut ret_val = String::with_capacity(max(4, src.len()));
for c in src.chars() {
let replacement = match c {
'<' => "<",
'>' => ">",
'\"' => """,
'\'' => "'",
'`' => "`",
'/' => "/",
'&' => "&",
'=' => "=",
' ' => " ",
'\t' => "	",
'\n' => " ",
'\x0c' => "",
'\r' => " ",
'\0' => "�",
_ => {
ret_val.push(c);
continue;
}
};
ret_val.push_str(replacement);
}
ret_val
}
pub fn is_html(input: &str) -> bool {
let santok = SanitizationTokenizer::new();
let mut chunk = ByteTendril::new();
chunk.push_slice(input.as_bytes());
let mut input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());
let tok = Tokenizer::new(santok, Default::default());
let _ = tok.feed(&mut input);
tok.end();
tok.sink.was_sanitized.get()
}
#[derive(Clone)]
struct SanitizationTokenizer {
was_sanitized: Cell<bool>,
}
impl SanitizationTokenizer {
pub fn new() -> SanitizationTokenizer {
SanitizationTokenizer {
was_sanitized: false.into(),
}
}
}
impl TokenSink for SanitizationTokenizer {
type Handle = ();
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
match token {
Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
_ => {
self.was_sanitized.set(true);
}
}
TokenSinkResult::Continue
}
fn end(&self) {}
}
#[derive(Debug)]
pub struct Builder<'a> {
tags: HashSet<&'a str>,
clean_content_tags: HashSet<&'a str>,
tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
generic_attributes: HashSet<&'a str>,
url_schemes: HashSet<&'a str>,
url_relative: UrlRelative<'a>,
attribute_filter: Option<Box<dyn AttributeFilter>>,
link_rel: Option<&'a str>,
allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
strip_comments: bool,
id_prefix: Option<&'a str>,
generic_attribute_prefixes: Option<HashSet<&'a str>>,
style_properties: Option<HashSet<&'a str>>,
}
impl<'a> Default for Builder<'a> {
fn default() -> Self {
#[rustfmt::skip]
let tags = hashset![
"a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
"bdo", "blockquote", "br", "caption", "center", "cite", "code",
"col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
"dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
"h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
"ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
"q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
"strike", "strong", "sub", "summary", "sup", "table", "tbody",
"td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
];
let clean_content_tags = hashset!["script", "style"];
let generic_attributes = hashset!["lang", "title"];
let tag_attributes = hashmap![
"a" => hashset![
"href", "hreflang"
],
"bdo" => hashset![
"dir"
],
"blockquote" => hashset![
"cite"
],
"col" => hashset![
"align", "char", "charoff", "span"
],
"colgroup" => hashset![
"align", "char", "charoff", "span"
],
"del" => hashset![
"cite", "datetime"
],
"hr" => hashset![
"align", "size", "width"
],
"img" => hashset![
"align", "alt", "height", "src", "width"
],
"ins" => hashset![
"cite", "datetime"
],
"ol" => hashset![
"start"
],
"q" => hashset![
"cite"
],
"table" => hashset![
"align", "char", "charoff", "summary"
],
"tbody" => hashset![
"align", "char", "charoff"
],
"td" => hashset![
"align", "char", "charoff", "colspan", "headers", "rowspan"
],
"tfoot" => hashset![
"align", "char", "charoff"
],
"th" => hashset![
"align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
],
"thead" => hashset![
"align", "char", "charoff"
],
"tr" => hashset![
"align", "char", "charoff"
],
];
let tag_attribute_values = hashmap![];
let set_tag_attribute_values = hashmap![];
let url_schemes = hashset![
"bitcoin",
"ftp",
"ftps",
"geo",
"http",
"https",
"im",
"irc",
"ircs",
"magnet",
"mailto",
"mms",
"mx",
"news",
"nntp",
"openpgp4fpr",
"sip",
"sms",
"smsto",
"ssh",
"tel",
"url",
"webcal",
"wtai",
"xmpp"
];
let allowed_classes = hashmap![];
Builder {
tags,
clean_content_tags,
tag_attributes,
tag_attribute_values,
set_tag_attribute_values,
generic_attributes,
url_schemes,
url_relative: UrlRelative::PassThrough,
attribute_filter: None,
link_rel: Some("noopener noreferrer"),
allowed_classes,
strip_comments: true,
id_prefix: None,
generic_attribute_prefixes: None,
style_properties: None,
}
}
}
impl<'a> Builder<'a> {
pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.tags = value;
self
}
pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
&mut self,
it: I,
) -> &mut Self {
self.tags.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
&mut self,
it: I,
) -> &mut Self {
for i in it {
self.tags.remove(i.borrow());
}
self
}
pub fn clone_tags(&self) -> HashSet<&'a str> {
self.tags.clone()
}
pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.clean_content_tags = value;
self
}
pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
&mut self,
it: I,
) -> &mut Self {
self.clean_content_tags
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
&mut self,
it: I,
) -> &mut Self {
for i in it {
self.clean_content_tags.remove(i.borrow());
}
self
}
pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
self.clean_content_tags.clone()
}
pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
self.tag_attributes = value;
self
}
pub fn add_tag_attributes<
T: 'a + ?Sized + Borrow<str>,
U: 'a + ?Sized + Borrow<str>,
I: IntoIter<Item = &'a T>,
>(
&mut self,
tag: &'a U,
it: I,
) -> &mut Self {
self.tag_attributes
.entry(tag.borrow())
.or_default()
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_tag_attributes<
'b,
'c,
T: 'b + ?Sized + Borrow<str>,
U: 'c + ?Sized + Borrow<str>,
I: IntoIter<Item = &'b T>,
>(
&mut self,
tag: &'c U,
it: I,
) -> &mut Self {
if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
for i in it {
tag.remove(i.borrow());
}
}
self
}
pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
self.tag_attributes.clone()
}
pub fn tag_attribute_values(
&mut self,
value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
) -> &mut Self {
self.tag_attribute_values = value;
self
}
pub fn add_tag_attribute_values<
T: 'a + ?Sized + Borrow<str>,
U: 'a + ?Sized + Borrow<str>,
V: 'a + ?Sized + Borrow<str>,
I: IntoIter<Item = &'a T>,
>(
&mut self,
tag: &'a U,
attribute: &'a V,
it: I,
) -> &mut Self {
self.tag_attribute_values
.entry(tag.borrow())
.or_default()
.entry(attribute.borrow())
.or_default()
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_tag_attribute_values<
'b,
'c,
T: 'b + ?Sized + Borrow<str>,
U: 'c + ?Sized + Borrow<str>,
V: 'c + ?Sized + Borrow<str>,
I: IntoIter<Item = &'b T>,
>(
&mut self,
tag: &'c U,
attribute: &'c V,
it: I,
) -> &mut Self {
if let Some(attrs) = self
.tag_attribute_values
.get_mut(tag.borrow())
.and_then(|map| map.get_mut(attribute.borrow()))
{
for i in it {
attrs.remove(i.borrow());
}
}
self
}
pub fn clone_tag_attribute_values(
&self,
) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
self.tag_attribute_values.clone()
}
pub fn set_tag_attribute_values(
&mut self,
value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
) -> &mut Self {
self.set_tag_attribute_values = value;
self
}
pub fn set_tag_attribute_value<
T: 'a + ?Sized + Borrow<str>,
A: 'a + ?Sized + Borrow<str>,
V: 'a + ?Sized + Borrow<str>,
>(
&mut self,
tag: &'a T,
attribute: &'a A,
value: &'a V,
) -> &mut Self {
self.set_tag_attribute_values
.entry(tag.borrow())
.or_default()
.insert(attribute.borrow(), value.borrow());
self
}
pub fn rm_set_tag_attribute_value<
T: 'a + ?Sized + Borrow<str>,
A: 'a + ?Sized + Borrow<str>,
>(
&mut self,
tag: &'a T,
attribute: &'a A,
) -> &mut Self {
if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
attributes.remove(attribute.borrow());
}
self
}
pub fn get_set_tag_attribute_value<
T: 'a + ?Sized + Borrow<str>,
A: 'a + ?Sized + Borrow<str>,
>(
&self,
tag: &'a T,
attribute: &'a A,
) -> Option<&'a str> {
self.set_tag_attribute_values
.get(tag.borrow())
.and_then(|map| map.get(attribute.borrow()))
.copied()
}
pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
self.set_tag_attribute_values.clone()
}
pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.generic_attribute_prefixes = Some(value);
self
}
pub fn add_generic_attribute_prefixes<
T: 'a + ?Sized + Borrow<str>,
I: IntoIter<Item = &'a T>,
>(
&mut self,
it: I,
) -> &mut Self {
self.generic_attribute_prefixes
.get_or_insert_with(HashSet::new)
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_generic_attribute_prefixes<
'b,
T: 'b + ?Sized + Borrow<str>,
I: IntoIter<Item = &'b T>,
>(
&mut self,
it: I,
) -> &mut Self {
if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
for i in it {
let _ = prefixes.remove(i.borrow());
}
prefixes.is_empty()
}) {
self.generic_attribute_prefixes = None;
}
self
}
pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
self.generic_attribute_prefixes.clone()
}
pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.generic_attributes = value;
self
}
pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
&mut self,
it: I,
) -> &mut Self {
self.generic_attributes
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
&mut self,
it: I,
) -> &mut Self {
for i in it {
self.generic_attributes.remove(i.borrow());
}
self
}
pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
self.generic_attributes.clone()
}
pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.url_schemes = value;
self
}
pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
&mut self,
it: I,
) -> &mut Self {
self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
&mut self,
it: I,
) -> &mut Self {
for i in it {
self.url_schemes.remove(i.borrow());
}
self
}
pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
self.url_schemes.clone()
}
pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
self.url_relative = value;
self
}
pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
where
CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
{
assert!(
self.attribute_filter.is_none(),
"attribute_filter can be set only once"
);
self.attribute_filter = Some(Box::new(callback));
self
}
pub fn is_url_relative_deny(&self) -> bool {
matches!(self.url_relative, UrlRelative::Deny)
}
pub fn is_url_relative_pass_through(&self) -> bool {
matches!(self.url_relative, UrlRelative::PassThrough)
}
pub fn is_url_relative_custom(&self) -> bool {
matches!(self.url_relative, UrlRelative::Custom(_))
}
pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
self.link_rel = value;
self
}
pub fn get_link_rel(&self) -> Option<&str> {
self.link_rel
}
pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
self.allowed_classes = value;
self
}
pub fn add_allowed_classes<
T: 'a + ?Sized + Borrow<str>,
U: 'a + ?Sized + Borrow<str>,
I: IntoIter<Item = &'a T>,
>(
&mut self,
tag: &'a U,
it: I,
) -> &mut Self {
self.allowed_classes
.entry(tag.borrow())
.or_default()
.extend(it.into_iter().map(Borrow::borrow));
self
}
pub fn rm_allowed_classes<
'b,
'c,
T: 'b + ?Sized + Borrow<str>,
U: 'c + ?Sized + Borrow<str>,
I: IntoIter<Item = &'b T>,
>(
&mut self,
tag: &'c U,
it: I,
) -> &mut Self {
if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
for i in it {
tag.remove(i.borrow());
}
}
self
}
pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
self.allowed_classes.clone()
}
pub fn strip_comments(&mut self, value: bool) -> &mut Self {
self.strip_comments = value;
self
}
pub fn will_strip_comments(&self) -> bool {
self.strip_comments
}
pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
self.id_prefix = value;
self
}
pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
self.style_properties = Some(value);
self
}
pub fn new() -> Self {
Self::default()
}
pub fn empty() -> Self {
Self {
tags: hashset![],
..Self::default()
}
}
pub fn clean(&self, src: &str) -> Document {
let parser = Self::make_parser();
let dom = parser.one(src);
self.clean_dom(dom)
}
pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
where
R: io::Read,
{
let parser = Self::make_parser().from_utf8();
let dom = parser.read_from(&mut src)?;
Ok(self.clean_dom(dom))
}
fn clean_dom(&self, dom: RcDom) -> Document {
let mut stack = Vec::new();
let mut removed = Vec::new();
let link_rel = self
.link_rel
.map(|link_rel| format_tendril!("{}", link_rel));
if link_rel.is_some() {
assert!(self.generic_attributes.get("rel").is_none());
assert!(self
.tag_attributes
.get("a")
.and_then(|a| a.get("rel"))
.is_none());
}
assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
for tag_name in self.allowed_classes.keys() {
assert!(self
.tag_attributes
.get(tag_name)
.and_then(|a| a.get("class"))
.is_none());
}
for tag_name in &self.clean_content_tags {
assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
}
let body = {
let children = dom.document.children.borrow();
children[0].clone()
};
stack.extend(
mem::take(&mut *body.children.borrow_mut())
.into_iter()
.rev(),
);
while let Some(mut node) = stack.pop() {
let parent = node.parent
.replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
.upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
if self.clean_node_content(&node) {
removed.push(node);
continue;
}
let pass_clean = self.clean_child(&mut node);
let pass = pass_clean && self.check_expected_namespace(&parent, &node);
if pass {
self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
} else {
for sub in node.children.borrow_mut().iter_mut() {
sub.parent.replace(Some(Rc::downgrade(&parent)));
}
}
stack.extend(
mem::take(&mut *node.children.borrow_mut())
.into_iter()
.rev(),
);
if !pass {
removed.push(node);
}
}
while let Some(node) = removed.pop() {
removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
}
Document(dom)
}
fn clean_node_content(&self, node: &Handle) -> bool {
match node.data {
NodeData::Text { .. }
| NodeData::Comment { .. }
| NodeData::Doctype { .. }
| NodeData::Document
| NodeData::ProcessingInstruction { .. } => false,
NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
}
}
fn clean_child(&self, child: &mut Handle) -> bool {
match child.data {
NodeData::Text { .. } => true,
NodeData::Comment { .. } => !self.strip_comments,
NodeData::Doctype { .. }
| NodeData::Document
| NodeData::ProcessingInstruction { .. } => false,
NodeData::Element {
ref name,
ref attrs,
..
} => {
if self.tags.contains(&*name.local) {
let attr_filter = |attr: &html5ever::Attribute| {
let whitelisted = self.generic_attributes.contains(&*attr.name.local)
|| self.generic_attribute_prefixes.as_ref().map(|prefixes| {
prefixes.iter().any(|&p| attr.name.local.starts_with(p))
}) == Some(true)
|| self
.tag_attributes
.get(&*name.local)
.map(|ta| ta.contains(&*attr.name.local))
== Some(true)
|| self
.tag_attribute_values
.get(&*name.local)
.and_then(|tav| tav.get(&*attr.name.local))
.map(|vs| {
let attr_val = attr.value.to_lowercase();
vs.iter().any(|v| v.to_lowercase() == attr_val)
})
== Some(true);
if !whitelisted {
&*attr.name.local == "class"
&& self.allowed_classes.contains_key(&*name.local)
} else if is_url_attr(&name.local, &attr.name.local) {
let url = Url::parse(&attr.value);
if let Ok(url) = url {
self.url_schemes.contains(url.scheme())
} else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
!matches!(self.url_relative, UrlRelative::Deny)
} else {
false
}
} else {
true
}
};
attrs.borrow_mut().retain(attr_filter);
true
} else {
false
}
}
}
}
fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
let (parent, child) = match (&parent.data, &child.data) {
(NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
_ => return true,
};
if parent.ns == ns!(html) && child.ns == ns!(svg) {
child.local == local_name!("svg")
} else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
child.local == local_name!("math")
} else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
matches!(
&*parent.local,
"mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
)
} else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
matches!(&*parent.local, "foreignObject")
} else if child.ns == ns!(svg) {
is_svg_tag(&child.local)
} else if child.ns == ns!(mathml) {
is_mathml_tag(&child.local)
} else if child.ns == ns!(html) {
(!is_svg_tag(&child.local) && !is_mathml_tag(&child.local))
|| matches!(
&*child.local,
"title" | "style" | "font" | "a" | "script" | "span"
)
} else {
parent.ns == child.ns
}
}
fn adjust_node_attributes(
&self,
child: &mut Handle,
link_rel: &Option<StrTendril>,
id_prefix: Option<&'a str>,
) {
if let NodeData::Element {
ref name,
ref attrs,
..
} = child.data
{
if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
let mut attrs = attrs.borrow_mut();
for (&set_name, &set_value) in set_attrs {
if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
{
if &*attr.value != set_value {
attr.value = set_value.into();
}
} else {
let attr = Attribute {
name: QualName::new(None, ns!(), set_name.into()),
value: set_value.into(),
};
attrs.push(attr);
}
}
}
if let Some(ref link_rel) = *link_rel {
if &*name.local == "a" {
attrs.borrow_mut().push(Attribute {
name: QualName::new(None, ns!(), local_name!("rel")),
value: link_rel.clone(),
})
}
}
if let Some(ref id_prefix) = id_prefix {
for attr in &mut *attrs.borrow_mut() {
if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
attr.value = format_tendril!("{}{}", id_prefix, attr.value);
}
}
}
if let Some(ref attr_filter) = self.attribute_filter {
let mut drop_attrs = Vec::new();
let mut attrs = attrs.borrow_mut();
for (i, attr) in &mut attrs.iter_mut().enumerate() {
let replace_with = if let Some(new) =
attr_filter.filter(&name.local, &attr.name.local, &attr.value)
{
if *new != *attr.value {
Some(format_tendril!("{}", new))
} else {
None }
} else {
drop_attrs.push(i);
None
};
if let Some(replace_with) = replace_with {
attr.value = replace_with;
}
}
for i in drop_attrs.into_iter().rev() {
attrs.swap_remove(i);
}
}
{
let mut drop_attrs = Vec::new();
let mut attrs = attrs.borrow_mut();
for (i, attr) in attrs.iter_mut().enumerate() {
if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
let new_value = self.url_relative.evaluate(&attr.value);
if let Some(new_value) = new_value {
attr.value = new_value;
} else {
drop_attrs.push(i);
}
}
}
for i in drop_attrs.into_iter().rev() {
attrs.swap_remove(i);
}
}
if let Some(allowed_values) = &self.style_properties {
for attr in &mut *attrs.borrow_mut() {
if &attr.name.local == "style" {
attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
}
}
}
if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
for attr in &mut *attrs.borrow_mut() {
if &attr.name.local == "class" {
let mut classes = vec![];
for class in attr.value.split_ascii_whitespace() {
if allowed_values.contains(class) {
classes.push(class.to_owned());
}
}
attr.value = format_tendril!("{}", classes.join(" "));
}
}
}
}
}
fn make_parser() -> html::Parser<RcDom> {
html::parse_fragment(
RcDom::default(),
html::ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
vec![],
)
}
}
fn is_url_attr(element: &str, attr: &str) -> bool {
attr == "href"
|| attr == "src"
|| (element == "form" && attr == "action")
|| (element == "object" && attr == "data")
|| ((element == "button" || element == "input") && attr == "formaction")
|| (element == "a" && attr == "ping")
|| (element == "video" && attr == "poster")
}
fn is_svg_tag(element: &str) -> bool {
matches!(
element,
"a" | "animate"
| "animateMotion"
| "animateTransform"
| "circle"
| "clipPath"
| "defs"
| "desc"
| "discard"
| "ellipse"
| "feBlend"
| "feColorMatrix"
| "feComponentTransfer"
| "feComposite"
| "feConvolveMatrix"
| "feDiffuseLighting"
| "feDisplacementMap"
| "feDistantLight"
| "feDropShadow"
| "feFlood"
| "feFuncA"
| "feFuncB"
| "feFuncG"
| "feFuncR"
| "feGaussianBlur"
| "feImage"
| "feMerge"
| "feMergeNode"
| "feMorphology"
| "feOffset"
| "fePointLight"
| "feSpecularLighting"
| "feSpotLight"
| "feTile"
| "feTurbulence"
| "filter"
| "foreignObject"
| "g"
| "image"
| "line"
| "linearGradient"
| "marker"
| "mask"
| "metadata"
| "mpath"
| "path"
| "pattern"
| "polygon"
| "polyline"
| "radialGradient"
| "rect"
| "script"
| "set"
| "stop"
| "style"
| "svg"
| "switch"
| "symbol"
| "text"
| "textPath"
| "title"
| "tspan"
| "use"
| "view"
)
}
fn is_mathml_tag(element: &str) -> bool {
matches!(
element,
"abs"
| "and"
| "annotation"
| "annotation-xml"
| "apply"
| "approx"
| "arccos"
| "arccosh"
| "arccot"
| "arccoth"
| "arccsc"
| "arccsch"
| "arcsec"
| "arcsech"
| "arcsin"
| "arcsinh"
| "arctan"
| "arctanh"
| "arg"
| "bind"
| "bvar"
| "card"
| "cartesianproduct"
| "cbytes"
| "ceiling"
| "cerror"
| "ci"
| "cn"
| "codomain"
| "complexes"
| "compose"
| "condition"
| "conjugate"
| "cos"
| "cosh"
| "cot"
| "coth"
| "cs"
| "csc"
| "csch"
| "csymbol"
| "curl"
| "declare"
| "degree"
| "determinant"
| "diff"
| "divergence"
| "divide"
| "domain"
| "domainofapplication"
| "emptyset"
| "eq"
| "equivalent"
| "eulergamma"
| "exists"
| "exp"
| "exponentiale"
| "factorial"
| "factorof"
| "false"
| "floor"
| "fn"
| "forall"
| "gcd"
| "geq"
| "grad"
| "gt"
| "ident"
| "image"
| "imaginary"
| "imaginaryi"
| "implies"
| "in"
| "infinity"
| "int"
| "integers"
| "intersect"
| "interval"
| "inverse"
| "lambda"
| "laplacian"
| "lcm"
| "leq"
| "limit"
| "list"
| "ln"
| "log"
| "logbase"
| "lowlimit"
| "lt"
| "maction"
| "maligngroup"
| "malignmark"
| "math"
| "matrix"
| "matrixrow"
| "max"
| "mean"
| "median"
| "menclose"
| "merror"
| "mfenced"
| "mfrac"
| "mglyph"
| "mi"
| "min"
| "minus"
| "mlabeledtr"
| "mlongdiv"
| "mmultiscripts"
| "mn"
| "mo"
| "mode"
| "moment"
| "momentabout"
| "mover"
| "mpadded"
| "mphantom"
| "mprescripts"
| "mroot"
| "mrow"
| "ms"
| "mscarries"
| "mscarry"
| "msgroup"
| "msline"
| "mspace"
| "msqrt"
| "msrow"
| "mstack"
| "mstyle"
| "msub"
| "msubsup"
| "msup"
| "mtable"
| "mtd"
| "mtext"
| "mtr"
| "munder"
| "munderover"
| "naturalnumbers"
| "neq"
| "none"
| "not"
| "notanumber"
| "notin"
| "notprsubset"
| "notsubset"
| "or"
| "otherwise"
| "outerproduct"
| "partialdiff"
| "pi"
| "piece"
| "piecewise"
| "plus"
| "power"
| "primes"
| "product"
| "prsubset"
| "quotient"
| "rationals"
| "real"
| "reals"
| "reln"
| "rem"
| "root"
| "scalarproduct"
| "sdev"
| "sec"
| "sech"
| "selector"
| "semantics"
| "sep"
| "set"
| "setdiff"
| "share"
| "sin"
| "sinh"
| "span"
| "subset"
| "sum"
| "tan"
| "tanh"
| "tendsto"
| "times"
| "transpose"
| "true"
| "union"
| "uplimit"
| "variance"
| "vector"
| "vectorproduct"
| "xor"
)
}
fn is_url_relative(url: &str) -> bool {
matches!(
Url::parse(url),
Err(url::ParseError::RelativeUrlWithoutBase)
)
}
#[non_exhaustive]
pub enum UrlRelative<'a> {
Deny,
PassThrough,
RewriteWithBase(Url),
RewriteWithRoot {
root: Url,
path: String,
},
Custom(Box<dyn UrlRelativeEvaluate<'a>>),
}
impl<'a> UrlRelative<'a> {
fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> {
match self {
UrlRelative::RewriteWithBase(ref url_base) => url_base
.join(url)
.ok()
.and_then(|x| StrTendril::from_str(x.as_str()).ok()),
UrlRelative::RewriteWithRoot { ref root, ref path } => {
(match url.as_bytes() {
[b'/', b'/', ..] => root.join(url),
b"/" => root.join("."),
[b'/', ..] => root.join(&url[1..]),
_ => root.join(path).and_then(|r| r.join(url)),
})
.ok()
.and_then(|x| StrTendril::from_str(x.as_str()).ok())
}
UrlRelative::Custom(ref evaluate) => evaluate
.evaluate(url)
.as_ref()
.map(Cow::as_ref)
.map(StrTendril::from_str)
.and_then(Result::ok),
UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
UrlRelative::Deny => None,
}
}
}
impl<'a> fmt::Debug for UrlRelative<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
UrlRelative::RewriteWithBase(ref base) => {
write!(f, "UrlRelative::RewriteWithBase({})", base)
}
UrlRelative::RewriteWithRoot { ref root, ref path } => {
write!(
f,
"UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
)
}
UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
}
}
}
pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
}
impl<'a, T> UrlRelativeEvaluate<'a> for T
where
T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
{
fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
self(url)
}
}
impl fmt::Debug for dyn AttributeFilter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("AttributeFilter")
}
}
pub trait AttributeFilter: Send + Sync {
fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
}
impl<T> AttributeFilter for T
where
T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
{
fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
self(element, attribute, value)
}
}
pub struct Document(RcDom);
impl Document {
pub fn write_to<W>(&self, writer: W) -> io::Result<()>
where
W: io::Write,
{
let opts = Self::serialize_opts();
let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
serialize(writer, &inner, opts)
}
#[cfg(ammonia_unstable)]
pub fn to_dom_node(&self) -> Handle {
self.0.document.children.borrow()[0].clone()
}
fn serialize_opts() -> SerializeOpts {
SerializeOpts::default()
}
}
impl Clone for Document {
fn clone(&self) -> Self {
let parser = Builder::make_parser();
let dom = parser.one(&self.to_string()[..]);
Document(dom)
}
}
impl Display for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let opts = Self::serialize_opts();
let mut ret_val = Vec::new();
let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
serialize(&mut ret_val, &inner, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
String::from_utf8(ret_val)
.expect("html5ever only supports UTF8")
.fmt(f)
}
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Document({})", self)
}
}
impl From<Document> for String {
fn from(document: Document) -> Self {
document.to_string()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn deeply_nested_whitelisted() {
clean(&"<b>".repeat(60_000));
}
#[test]
fn deeply_nested_blacklisted() {
clean(&"<b-b>".repeat(60_000));
}
#[test]
fn deeply_nested_alternating() {
clean(&"<b-b>".repeat(35_000));
}
#[test]
fn included_angles() {
let fragment = "1 < 2";
let result = clean(fragment);
assert_eq!(result, "1 < 2");
}
#[test]
fn remove_script() {
let fragment = "an <script>evil()</script> example";
let result = clean(fragment);
assert_eq!(result, "an example");
}
#[test]
fn ignore_link() {
let fragment = "a <a href=\"http://www.google.com\">good</a> example";
let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
good</a> example";
let result = clean(fragment);
assert_eq!(result, expected);
}
#[test]
fn remove_unsafe_link() {
let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
let result = clean(fragment);
assert_eq!(
result,
"an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
);
}
#[test]
fn remove_js_link() {
let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
let result = clean(fragment);
assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
}
#[test]
fn tag_rebalance() {
let fragment = "<b>AWESOME!";
let result = clean(fragment);
assert_eq!(result, "<b>AWESOME!</b>");
}
#[test]
fn allow_url_relative() {
let fragment = "<a href=test>Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.clean(fragment)
.to_string();
assert_eq!(
result,
"<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
);
}
#[test]
fn rewrite_url_relative() {
let fragment = "<a href=test>Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
"<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
);
}
#[test]
fn rewrite_url_relative_with_invalid_url() {
let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
}
#[test]
fn attribute_filter_nop() {
let fragment = "<a href=test>Test</a>";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("a", elem);
assert!(
matches!(
(attr, value),
("href", "test") | ("rel", "noopener noreferrer")
),
"{}",
value.to_string()
);
Some(value.into())
})
.clean(fragment)
.to_string();
assert_eq!(
result,
"<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
);
}
#[test]
fn attribute_filter_drop() {
let fragment = "Test<img alt=test src=imgtest>";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => None,
("alt", "test") => Some(value.into()),
_ => panic!("unexpected"),
}
})
.clean(fragment)
.to_string();
assert_eq!(result, r#"Test<img alt="test">"#);
}
#[test]
fn url_filter_absolute() {
let fragment = "Test<img alt=test src=imgtest>";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => {
Some(format!("https://example.com/images/{}", value).into())
}
("alt", "test") => None,
_ => panic!("unexpected"),
}
})
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://wrong.invalid/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
r#"Test<img src="https://example.com/images/imgtest">"#
);
}
#[test]
fn url_filter_relative() {
let fragment = "Test<img alt=test src=imgtest>";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => Some("rewrite".into()),
("alt", "test") => Some("altalt".into()),
_ => panic!("unexpected"),
}
})
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("https://example.com/base/#").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
);
}
#[test]
fn rewrite_url_relative_no_rel() {
let fragment = "<a href=test>Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.link_rel(None)
.clean(fragment)
.to_string();
assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
}
#[test]
fn deny_url_relative() {
let fragment = "<a href=test>Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::Deny)
.clean(fragment)
.to_string();
assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
}
#[test]
fn replace_rel() {
let fragment = "<a href=test rel=\"garbage\">Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.clean(fragment)
.to_string();
assert_eq!(
result,
"<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
);
}
#[test]
fn consider_rel_still_banned() {
let fragment = "<a href=test rel=\"garbage\">Test</a>";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.link_rel(None)
.clean(fragment)
.to_string();
assert_eq!(result, "<a href=\"test\">Test</a>");
}
#[test]
fn object_data() {
let fragment = "<span data=\"javascript:evil()\">Test</span>\
<object data=\"javascript:evil()\"></object>M";
let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
let result = Builder::new()
.tags(hashset!["span", "object"])
.generic_attributes(hashset!["data"])
.clean(fragment)
.to_string();
assert_eq!(result, expected);
}
#[test]
fn remove_attributes() {
let fragment = "<table border=\"1\"><tr></tr></table>";
let result = Builder::new().clean(fragment);
assert_eq!(
result.to_string(),
"<table><tbody><tr></tr></tbody></table>"
);
}
#[test]
fn quotes_in_attrs() {
let fragment = "<b title='\"'>contents</b>";
let result = clean(fragment);
assert_eq!(result, "<b title=\""\">contents</b>");
}
#[test]
#[should_panic]
fn panic_if_rel_is_allowed_and_replaced_generic() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.generic_attributes(hashset!["rel"])
.clean("something");
}
#[test]
#[should_panic]
fn panic_if_rel_is_allowed_and_replaced_a() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.tag_attributes(hashmap![
"a" => hashset!["rel"],
])
.clean("something");
}
#[test]
fn no_panic_if_rel_is_allowed_and_replaced_span() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.tag_attributes(hashmap![
"span" => hashset!["rel"],
])
.clean("<span rel=\"what\">s</span>");
}
#[test]
fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
Builder::new()
.link_rel(None)
.generic_attributes(hashset!["rel"])
.clean("<a rel=\"what\">s</a>");
}
#[test]
fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"a" => hashset!["rel"],
])
.clean("<a rel=\"what\">s</a>");
}
#[test]
fn dont_close_void_elements() {
let fragment = "<br>";
let result = clean(fragment);
assert_eq!(result.to_string(), "<br>");
}
#[should_panic]
#[test]
fn panic_on_allowed_classes_tag_attributes() {
let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"p" => hashset!["class"],
"a" => hashset!["class"],
])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
}
#[should_panic]
#[test]
fn panic_on_allowed_classes_generic_attributes() {
let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
Builder::new()
.link_rel(None)
.generic_attributes(hashset!["class", "href", "some-foo"])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
}
#[test]
fn remove_non_allowed_classes() {
let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
let result = Builder::new()
.link_rel(None)
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
);
}
#[test]
fn remove_non_allowed_classes_with_tag_class() {
let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
let result = Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"div" => hashset!["class"],
])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
);
}
#[test]
fn allowed_classes_ascii_whitespace() {
let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
let result = Builder::new()
.allowed_classes(hashmap![
"p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
])
.clean(fragment);
assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
}
#[test]
fn remove_non_allowed_attributes_with_tag_attribute_values() {
let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
let result = Builder::new()
.tag_attribute_values(hashmap![
"p" => hashmap![
"data-label" => hashset!["bar"],
],
])
.tag_attributes(hashmap![
"p" => hashset!["name"],
])
.clean(fragment);
assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
}
#[test]
fn keep_allowed_attributes_with_tag_attribute_values() {
let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
let result = Builder::new()
.tag_attribute_values(hashmap![
"p" => hashmap![
"data-label" => hashset!["bar"],
],
])
.tag_attributes(hashmap![
"p" => hashset!["name"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"<p data-label=\"bar\" name=\"foo\"></p>",
);
}
#[test]
fn tag_attribute_values_case_insensitive() {
let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
let result = Builder::new()
.tags(hashset!["input"])
.tag_attribute_values(hashmap![
"input" => hashmap![
"type" => hashset!["checkbox"],
],
])
.tag_attributes(hashmap![
"input" => hashset!["name"],
])
.clean(fragment);
assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
}
#[test]
fn set_tag_attribute_values() {
let fragment = "<a href=\"https://example.com/\">Link</a>";
let result = Builder::new()
.link_rel(None)
.add_tag_attributes("a", &["target"])
.set_tag_attribute_value("a", "target", "_blank")
.clean(fragment);
assert_eq!(
result.to_string(),
"<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
);
}
#[test]
fn update_existing_set_tag_attribute_values() {
let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
let result = Builder::new()
.link_rel(None)
.add_tag_attributes("a", &["target"])
.set_tag_attribute_value("a", "target", "_blank")
.clean(fragment);
assert_eq!(
result.to_string(),
"<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
);
}
#[test]
fn unwhitelisted_set_tag_attribute_values() {
let fragment = "<span>hi</span><my-elem>";
let result = Builder::new()
.set_tag_attribute_value("my-elem", "my-attr", "val")
.clean(fragment);
assert_eq!(result.to_string(), "<span>hi</span>",);
}
#[test]
fn remove_entity_link() {
let fragment = "<a href=\"javascript:a\
lert('XSS')\">Click me!</a>";
let result = clean(fragment);
assert_eq!(
result.to_string(),
"<a rel=\"noopener noreferrer\">Click me!</a>"
);
}
#[test]
fn remove_relative_url_evaluate() {
fn is_absolute_path(url: &str) -> bool {
let u = url.as_bytes();
u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
}
fn is_banned(url: &str) -> bool {
let u = url.as_bytes();
u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
}
fn evaluate(url: &str) -> Option<Cow<'_, str>> {
if is_absolute_path(url) {
Some(Cow::Owned(String::from("/root") + url))
} else if is_banned(url) {
None
} else {
Some(Cow::Borrowed(url))
}
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
.to_string();
assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
}
#[test]
fn remove_relative_url_evaluate_b() {
fn is_absolute_path(url: &str) -> bool {
let u = url.as_bytes();
u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
}
fn is_banned(url: &str) -> bool {
let u = url.as_bytes();
u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
}
fn evaluate(url: &str) -> Option<Cow<'_, str>> {
if is_absolute_path(url) {
Some(Cow::Owned(String::from("/root") + url))
} else if is_banned(url) {
None
} else {
Some(Cow::Borrowed(url))
}
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
.to_string();
assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
}
#[test]
fn remove_relative_url_evaluate_c() {
fn evaluate(_: &str) -> Option<Cow<'_, str>> {
return Some(Cow::Owned(String::from("invalid")));
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("<a href=\"https://www.google.com/\">google</a>")
.to_string();
assert_eq!(
a,
"<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
);
}
#[test]
fn clean_children_of_bad_element() {
let fragment = "<bad><evil>a</evil>b</bad>";
let result = Builder::new().clean(fragment);
assert_eq!(result.to_string(), "ab");
}
#[test]
fn reader_input() {
let fragment = b"an <script>evil()</script> example";
let result = Builder::new().clean_from_reader(&fragment[..]);
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), "an example");
}
#[test]
fn reader_non_utf8() {
let fragment = b"non-utf8 \xF0\x90\x80string";
let result = Builder::new().clean_from_reader(&fragment[..]);
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
}
#[test]
fn display_impl() {
let fragment = r#"a <a>link</a>"#;
let result = Builder::new().link_rel(None).clean(fragment);
assert_eq!(format!("{}", result), "a <a>link</a>");
}
#[test]
fn debug_impl() {
let fragment = r#"a <a>link</a>"#;
let result = Builder::new().link_rel(None).clean(fragment);
assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
}
#[cfg(ammonia_unstable)]
#[test]
fn to_dom_node() {
let fragment = r#"a <a>link</a>"#;
let result = Builder::new().link_rel(None).clean(fragment);
let _node = result.to_dom_node();
}
#[test]
fn string_from_document() {
let fragment = r#"a <a>link"#;
let result = String::from(Builder::new().link_rel(None).clean(fragment));
assert_eq!(format!("{}", result), "a <a>link</a>");
}
fn require_sync<T: Sync>(_: T) {}
fn require_send<T: Send>(_: T) {}
#[test]
fn require_sync_and_send() {
require_sync(Builder::new());
require_send(Builder::new());
}
#[test]
fn id_prefixed() {
let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
let result = String::from(
Builder::new()
.tag_attributes(hashmap![
"a" => hashset!["id"],
])
.id_prefix(Some("prefix-"))
.clean(fragment),
);
assert_eq!(
result.to_string(),
"<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
);
}
#[test]
fn id_already_prefixed() {
let fragment = "<a id=\"prefix-hello\"></a>";
let result = String::from(
Builder::new()
.tag_attributes(hashmap![
"a" => hashset!["id"],
])
.id_prefix(Some("prefix-"))
.clean(fragment),
);
assert_eq!(
result.to_string(),
"<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
);
}
#[test]
fn clean_content_tags() {
let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
let result = String::from(
Builder::new()
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "");
}
#[test]
fn only_clean_content_tags() {
let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
let result = String::from(
Builder::new()
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
}
#[test]
fn clean_removed_default_tag() {
let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
let result = String::from(
Builder::new()
.rm_tags(hashset!["a"])
.rm_tag_attributes("a", hashset!["href", "hreflang"])
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
}
#[test]
#[should_panic]
fn panic_on_clean_content_tag_attribute() {
Builder::new()
.rm_tags(std::iter::once("a"))
.clean_content_tags(hashset!["a"])
.clean("");
}
#[test]
#[should_panic]
fn panic_on_clean_content_tag() {
Builder::new().clean_content_tags(hashset!["a"]).clean("");
}
#[test]
fn clean_text_test() {
assert_eq!(
clean_text("<this> is <a test function"),
"<this> is <a test function"
);
}
#[test]
fn clean_text_spaces_test() {
assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  ");
}
#[test]
fn ns_svg() {
let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
assert_eq!(result.to_string(), "test");
let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>");
let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
assert_eq!(
result.to_string(),
"<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
);
}
#[test]
fn ns_mathml() {
let fragment = "<mglyph></mglyph>";
let result = String::from(
Builder::new()
.add_tags(&["math", "mtext", "mglyph"])
.clean(fragment),
);
assert_eq!(result.to_string(), "");
let fragment = "<math><mtext><div><mglyph>";
let result = String::from(
Builder::new()
.add_tags(&["math", "mtext", "mglyph"])
.clean(fragment),
);
assert_eq!(
result.to_string(),
"<math><mtext><div></div></mtext></math>"
);
let fragment = "<math><mtext><mglyph>";
let result = String::from(
Builder::new()
.add_tags(&["math", "mtext", "mglyph"])
.clean(fragment),
);
assert_eq!(
result.to_string(),
"<math><mtext><mglyph></mglyph></mtext></math>"
);
}
#[test]
fn xml_processing_instruction() {
let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
let result = String::from(Builder::new().clean(fragment));
assert_eq!(result.to_string(), "");
let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
assert_eq!(result.to_string(), "<svg></svg>");
let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?>");
}
#[test]
fn generic_attribute_prefixes() {
let prefix_data = ["data-"];
let prefix_code = ["code-"];
let mut b = Builder::new();
let mut hs: HashSet<&'_ str> = HashSet::new();
hs.insert("data-");
assert!(b.generic_attribute_prefixes.is_none());
b.generic_attribute_prefixes(hs);
assert!(b.generic_attribute_prefixes.is_some());
assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
b.add_generic_attribute_prefixes(&prefix_data);
assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
b.add_generic_attribute_prefixes(&prefix_code);
assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
b.rm_generic_attribute_prefixes(&prefix_code);
assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
b.rm_generic_attribute_prefixes(&prefix_code);
assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
b.rm_generic_attribute_prefixes(&prefix_data);
assert!(b.generic_attribute_prefixes.is_none());
}
#[test]
fn generic_attribute_prefixes_clean() {
let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
let result_cleaned = String::from(
Builder::new()
.add_tag_attributes("a", &["data-1"])
.clean(fragment),
);
assert_eq!(
result_cleaned,
r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
);
let result_allowed = String::from(
Builder::new()
.add_tag_attributes("a", &["data-1"])
.add_generic_attribute_prefixes(&["data-"])
.clean(fragment),
);
assert_eq!(
result_allowed,
r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
);
let result_allowed = String::from(
Builder::new()
.add_tag_attributes("a", &["data-1", "code-1"])
.add_generic_attribute_prefixes(&["data-", "code-"])
.clean(fragment),
);
assert_eq!(
result_allowed,
r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
);
}
#[test]
fn lesser_than_isnt_html() {
let fragment = "1 < 2";
assert!(!is_html(fragment));
}
#[test]
fn dense_lesser_than_isnt_html() {
let fragment = "1<2";
assert!(!is_html(fragment));
}
#[test]
fn what_about_number_elements() {
let fragment = "foo<2>bar";
assert!(!is_html(fragment));
}
#[test]
fn turbofish_is_html_sadly() {
let fragment = "Vec::<u8>::new()";
assert!(is_html(fragment));
}
#[test]
fn stop_grinning() {
let fragment = "did you really believe me? <g>";
assert!(is_html(fragment));
}
#[test]
fn dont_be_bold() {
let fragment = "<b>";
assert!(is_html(fragment));
}
#[test]
fn rewrite_with_root() {
let tests = [
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"README.md",
"",
"https://github.com/rust-ammonia/ammonia/blob/master/README.md",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"README.md",
"/",
"https://github.com/rust-ammonia/ammonia/blob/master/",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"README.md",
"/CONTRIBUTING.md",
"https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master",
"README.md",
"",
"https://github.com/rust-ammonia/ammonia/blob/README.md",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master",
"README.md",
"/",
"https://github.com/rust-ammonia/ammonia/blob/",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master",
"README.md",
"/CONTRIBUTING.md",
"https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"",
"",
"https://github.com/rust-ammonia/ammonia/blob/master/",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"",
"/",
"https://github.com/rust-ammonia/ammonia/blob/master/",
),
(
"https://github.com/rust-ammonia/ammonia/blob/master/",
"",
"/CONTRIBUTING.md",
"https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
),
(
"https://github.com/",
"rust-ammonia/ammonia/blob/master/README.md",
"",
"https://github.com/rust-ammonia/ammonia/blob/master/README.md",
),
(
"https://github.com/",
"rust-ammonia/ammonia/blob/master/README.md",
"/",
"https://github.com/",
),
(
"https://github.com/",
"rust-ammonia/ammonia/blob/master/README.md",
"CONTRIBUTING.md",
"https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
),
(
"https://github.com/",
"rust-ammonia/ammonia/blob/master/README.md",
"/CONTRIBUTING.md",
"https://github.com/CONTRIBUTING.md",
),
];
for (root, path, url, result) in tests {
let h = format!(r#"<a href="{url}">test</a>"#);
let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
let a = Builder::new()
.url_relative(UrlRelative::RewriteWithRoot {
root: Url::parse(root).unwrap(),
path: path.to_string(),
})
.clean(&h)
.to_string();
if r != a {
println!(
"failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
r = r
);
assert_eq!(r, a);
}
}
}
}