use super::super::{
chars,
tokenizer::{HtmlToken, TagTokenType},
Acknowledgement, HtmlParseError, HtmlParser, HtmlParserError, HTML_NAMESPACE,
MATHML_NAMESPACE, SVG_NAMESPACE,
};
const FOREIGN_CONTENT_BREAKOUT_TAGS: [&str; 44] = [
"b",
"big",
"blockquote",
"body",
"br",
"center",
"code",
"dd",
"div",
"dl",
"dt",
"em",
"embed",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"hr",
"i",
"img",
"li",
"listing",
"menu",
"meta",
"nobr",
"ol",
"p",
"pre",
"ruby",
"s",
"small",
"span",
"strong",
"strike",
"sub",
"sup",
"table",
"tt",
"u",
"ul",
"var",
];
impl HtmlParser {
pub(crate) fn in_foreign_content(
&mut self,
token: HtmlToken,
) -> Result<Acknowledgement, HtmlParseError> {
match token {
HtmlToken::Character('\0') => {
self.handle_error(HtmlParserError::MinorError(
"unexpected null character in foreign content".to_string(),
))?;
self.insert_character('\u{FFFD}')?;
}
HtmlToken::Character(
c @ (chars::CHARACTER_TABULATION
| chars::LINE_FEED
| chars::FORM_FEED
| chars::CARRIAGE_RETURN
| chars::SPACE),
) => {
self.insert_character(c)?;
}
HtmlToken::Character(c) => {
self.insert_character(c)?;
self.frameset_ok = false;
}
HtmlToken::Characters(ref s) => {
let filtered: String;
let text = if s.contains('\0') {
self.handle_error(HtmlParserError::MinorError(
"unexpected null character in foreign content".to_string(),
))?;
filtered = s.replace('\0', "\u{FFFD}");
&filtered
} else {
s
};
self.insert_characters(text)?;
if text
.bytes()
.any(|b| !matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' '))
{
self.frameset_ok = false;
}
}
HtmlToken::Comment(comment) => {
self.insert_a_comment(comment, None)?;
}
HtmlToken::DocType(_) => {
self.handle_error(HtmlParserError::MinorError(
"unexpected DOCTYPE in foreign content".to_string(),
))?;
}
HtmlToken::TagToken(TagTokenType::StartTag(ref tag))
if FOREIGN_CONTENT_BREAKOUT_TAGS.contains(&tag.tag_name.as_str())
|| (tag.tag_name == "font"
&& tag
.attributes
.iter()
.any(|a| a.name == "color" || a.name == "face" || a.name == "size")) =>
{
self.handle_error(HtmlParserError::MinorError(format!(
"unexpected HTML start tag <{}> in foreign content",
tag.tag_name
)))?;
loop {
let acn_id = match self.adjusted_current_node_id_opt() {
Some(id) => id,
None => break,
};
let ns = self.element_namespace(acn_id).unwrap_or(HTML_NAMESPACE);
if ns == HTML_NAMESPACE {
break;
}
if self.is_fragment_parser() && self.open_elements.len() == 1 {
break;
}
self.open_elements.pop();
}
return self.handle_token(token, self.insertion_mode);
}
HtmlToken::TagToken(TagTokenType::StartTag(mut tag)) => {
let acn_id = self
.adjusted_current_node_id_opt()
.ok_or(HtmlParseError::new(
"no adjusted current node for foreign content start tag",
))?;
let acn_ns = self
.element_namespace(acn_id)
.unwrap_or(HTML_NAMESPACE)
.to_string();
if acn_ns == MATHML_NAMESPACE {
Self::adjust_mathml_attributes(&mut tag);
}
if acn_ns == SVG_NAMESPACE {
Self::adjust_svg_tag_names(&mut tag);
Self::adjust_svg_attributes(&mut tag);
}
Self::adjust_foreign_attributes(&mut tag);
let self_closing = tag.self_closing;
self.insert_foreign_element(tag, &acn_ns)?;
if self_closing {
self.open_elements.pop();
return Ok(Acknowledgement::yes());
}
}
HtmlToken::TagToken(TagTokenType::EndTag(ref tag))
if tag.tag_name == "script"
&& self.current_node_id().is_some_and(|id| {
self.element_namespace(id) == Some(SVG_NAMESPACE)
&& self.element_name(id) == Some("script")
}) =>
{
self.open_elements.pop();
}
HtmlToken::TagToken(TagTokenType::EndTag(ref tag)) => {
let tag_name = tag.tag_name.clone();
return self.foreign_content_end_tag(&tag_name, token);
}
HtmlToken::EndOfFile => {
return self.handle_token(token, self.insertion_mode);
}
}
Ok(Acknowledgement::no())
}
fn foreign_content_end_tag(
&mut self,
tag_name: &str,
token: HtmlToken,
) -> Result<Acknowledgement, HtmlParseError> {
if self.open_elements.is_empty() {
return Ok(Acknowledgement::no());
}
let mut node_index = self.open_elements.len() - 1;
if let Some(name) = self.element_name(self.open_elements[node_index]) {
if name.to_ascii_lowercase() != tag_name {
self.handle_error(HtmlParserError::MinorError(format!(
"unexpected end tag </{}> in foreign content (current node is <{}>)",
tag_name, name
)))?;
}
}
loop {
if node_index == 0 {
return Ok(Acknowledgement::no());
}
let node_id = self.open_elements[node_index];
if let Some(name) = self.element_name(node_id) {
if name.to_ascii_lowercase() == tag_name {
self.open_elements.truncate(node_index);
return Ok(Acknowledgement::no());
}
}
node_index -= 1;
let node_id = self.open_elements[node_index];
let ns = self.element_namespace(node_id).unwrap_or(HTML_NAMESPACE);
if ns != HTML_NAMESPACE {
continue;
}
return self.handle_token(token, self.insertion_mode);
}
}
}