#![allow(warnings)]
pub use interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
pub use interface::{NodeOrText, AppendNode, AppendText};
pub use interface::{TreeSink, Tracer, NextParserState, create_element, ElementFlags};
use self::types::*;
use self::actions::TreeBuilderActions;
use self::rules::TreeBuilderStep;
use ExpandedName;
use QualName;
use tendril::StrTendril;
use tokenizer;
use tokenizer::{Doctype, StartTag, Tag, TokenSink, TokenSinkResult};
use tokenizer::states as tok_state;
use util::str::is_ascii_whitespace;
use std::default::Default;
use std::mem::replace;
use std::borrow::Cow::Borrowed;
use std::collections::VecDeque;
#[macro_use] mod tag_sets;
mod data;
mod types;
mod actions;
mod rules {
include!(concat!(env!("OUT_DIR"), "/rules.rs"));
}
#[derive(Copy, Clone)]
pub struct TreeBuilderOpts {
pub exact_errors: bool,
pub scripting_enabled: bool,
pub iframe_srcdoc: bool,
pub drop_doctype: bool,
pub ignore_missing_rules: bool,
pub quirks_mode: QuirksMode,
}
impl Default for TreeBuilderOpts {
fn default() -> TreeBuilderOpts {
TreeBuilderOpts {
exact_errors: false,
scripting_enabled: true,
iframe_srcdoc: false,
drop_doctype: false,
ignore_missing_rules: false,
quirks_mode: NoQuirks,
}
}
}
pub struct TreeBuilder<Handle, Sink> {
opts: TreeBuilderOpts,
pub sink: Sink,
mode: InsertionMode,
orig_mode: Option<InsertionMode>,
template_modes: Vec<InsertionMode>,
pending_table_text: Vec<(SplitStatus, StrTendril)>,
quirks_mode: QuirksMode,
doc_handle: Handle,
open_elems: Vec<Handle>,
active_formatting: Vec<FormatEntry<Handle>>,
head_elem: Option<Handle>,
form_elem: Option<Handle>,
frameset_ok: bool,
ignore_lf: bool,
foster_parenting: bool,
context_elem: Option<Handle>,
current_line: u64,
}
impl<Handle, Sink> TreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
template_modes: vec!(),
pending_table_text: vec!(),
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
open_elems: vec!(),
active_formatting: vec!(),
head_elem: None,
form_elem: None,
frameset_ok: true,
ignore_lf: false,
foster_parenting: false,
context_elem: None,
current_line: 1,
}
}
pub fn new_for_fragment(mut sink: Sink,
context_elem: Handle,
form_elem: Option<Handle>,
opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
let context_is_template =
sink.elem_name(&context_elem) == expanded_name!(html "template");
let mut tb = TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
template_modes: if context_is_template { vec![InTemplate] } else { vec![] },
pending_table_text: vec!(),
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
open_elems: vec!(),
active_formatting: vec!(),
head_elem: None,
form_elem: form_elem,
frameset_ok: true,
ignore_lf: false,
foster_parenting: false,
context_elem: Some(context_elem),
current_line: 1,
};
tb.create_root(vec!());
tb.mode = tb.reset_insertion_mode();
tb
}
pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
let elem = self.context_elem.as_ref().expect("no context element");
let name = match self.sink.elem_name(elem) {
ExpandedName { ns: &ns!(html), local } => local,
_ => return tok_state::Data
};
match *name {
local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
local_name!("style") | local_name!("xmp") | local_name!("iframe")
| local_name!("noembed") | local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
local_name!("script") => tok_state::RawData(tok_state::ScriptData),
local_name!("noscript") => if self.opts.scripting_enabled {
tok_state::RawData(tok_state::Rawtext)
} else {
tok_state::Data
},
local_name!("plaintext") => tok_state::Plaintext,
_ => tok_state::Data
}
}
pub fn trace_handles(&self, tracer: &Tracer<Handle=Handle>) {
tracer.trace_handle(&self.doc_handle);
for e in &self.open_elems {
tracer.trace_handle(e);
}
for e in &self.active_formatting {
match e {
&Element(ref h, _) => tracer.trace_handle(h),
_ => (),
}
}
self.head_elem.as_ref().map(|h| tracer.trace_handle(h));
self.form_elem.as_ref().map(|h| tracer.trace_handle(h));
self.context_elem.as_ref().map(|h| tracer.trace_handle(h));
}
#[allow(dead_code)]
fn dump_state(&self, label: String) {
println!("dump_state on {}", label);
print!(" open_elems:");
for node in self.open_elems.iter() {
let name = self.sink.elem_name(node);
match *name.ns {
ns!(html) => print!(" {}", name.local),
_ => panic!(),
}
}
println!("");
print!(" active_formatting:");
for entry in self.active_formatting.iter() {
match entry {
&Marker => print!(" Marker"),
&Element(ref h, _) => {
let name = self.sink.elem_name(h);
match *name.ns {
ns!(html) => print!(" {}", name.local),
_ => panic!(),
}
}
}
}
println!("");
}
fn debug_step(&self, mode: InsertionMode, token: &Token) {
use util::str::to_escaped_string;
debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode);
}
fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> {
let mut more_tokens = VecDeque::new();
loop {
let should_have_acknowledged_self_closing_flag =
matches!(token, TagToken(Tag { self_closing: true, kind: StartTag, .. }));
let result = if self.is_foreign(&token) {
self.step_foreign(token)
} else {
let mode = self.mode;
self.step(mode, token)
};
match result {
Done => {
if should_have_acknowledged_self_closing_flag {
self.sink.parse_error(Borrowed("Unacknowledged self-closing tag"));
}
token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue);
}
DoneAckSelfClosing => {
token = unwrap_or_return!(more_tokens.pop_front(), tokenizer::TokenSinkResult::Continue);
}
Reprocess(m, t) => {
self.mode = m;
token = t;
}
ReprocessForeign(t) => {
token = t;
}
SplitWhitespace(mut buf) => {
let p = buf.pop_front_char_run(is_ascii_whitespace);
let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue);
let status = if is_ws { Whitespace } else { NotWhitespace };
token = CharacterTokens(status, first);
if buf.len32() > 0 {
more_tokens.push_back(CharacterTokens(NotSplit, buf));
}
}
Script(node) => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::Script(node);
}
ToPlaintext => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::Plaintext;
}
ToRawData(k) => {
assert!(more_tokens.is_empty());
return tokenizer::TokenSinkResult::RawData(k);
}
}
}
}
pub fn is_fragment(&self) -> bool {
self.context_elem.is_some()
}
fn appropriate_place_for_insertion(&mut self,
override_target: Option<Handle>)
-> InsertionPoint<Handle> {
use self::tag_sets::*;
declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
let target = override_target.unwrap_or_else(|| self.current_node().clone());
if !(self.foster_parenting && self.elem_in(&target, foster_target)) {
if self.html_elem_named(&target, local_name!("template")) {
let contents = self.sink.get_template_contents(&target);
return LastChild(contents);
} else {
return LastChild(target);
}
}
let mut iter = self.open_elems.iter().rev().peekable();
while let Some(elem) = iter.next() {
if self.html_elem_named(&elem, local_name!("template")) {
let contents = self.sink.get_template_contents(&elem);
return LastChild(contents);
} else if self.html_elem_named(&elem, local_name!("table")) {
if self.sink.has_parent_node(&elem) {
return BeforeSibling(elem.clone());
} else {
let previous_element = (*iter.peek().unwrap()).clone();
return LastChild(previous_element);
}
}
}
let html_elem = self.html_elem();
LastChild(html_elem.clone())
}
fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) {
match insertion_point {
LastChild(parent) => self.sink.append(&parent, child),
BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child)
}
}
}
impl<Handle, Sink> TokenSink
for TreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
type Handle = Handle;
fn process_token(&mut self, token: tokenizer::Token, line_number: u64) -> TokenSinkResult<Handle> {
if line_number != self.current_line {
self.sink.set_current_line(line_number);
}
let ignore_lf = replace(&mut self.ignore_lf, false);
let token = match token {
tokenizer::ParseError(e) => {
self.sink.parse_error(e);
return tokenizer::TokenSinkResult::Continue;
}
tokenizer::DoctypeToken(dt) => if self.mode == Initial {
let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
if err {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Bad DOCTYPE",
"Bad DOCTYPE: {:?}", dt));
}
let Doctype { name, public_id, system_id, force_quirks: _ } = dt;
if !self.opts.drop_doctype {
self.sink.append_doctype_to_document(
name.unwrap_or(StrTendril::new()),
public_id.unwrap_or(StrTendril::new()),
system_id.unwrap_or(StrTendril::new())
);
}
self.set_quirks_mode(quirk);
self.mode = BeforeHtml;
return tokenizer::TokenSinkResult::Continue;
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"DOCTYPE in body",
"DOCTYPE in insertion mode {:?}", self.mode));
return tokenizer::TokenSinkResult::Continue;
},
tokenizer::TagToken(x) => TagToken(x),
tokenizer::CommentToken(x) => CommentToken(x),
tokenizer::NullCharacterToken => NullCharacterToken,
tokenizer::EOFToken => EOFToken,
tokenizer::CharacterTokens(mut x) => {
if ignore_lf && x.starts_with("\n") {
x.pop_front(1);
}
if x.is_empty() {
return tokenizer::TokenSinkResult::Continue;
}
CharacterTokens(NotSplit, x)
}
};
self.process_to_completion(token)
}
fn end(&mut self) {
for elem in self.open_elems.drain(..).rev() {
self.sink.pop(&elem);
}
}
fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
!self.open_elems.is_empty() &&
self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html)
}
}
#[cfg(test)]
#[allow(non_snake_case)]
mod test {
use markup5ever::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
use markup5ever::interface::{NodeOrText, AppendNode, AppendText};
use markup5ever::interface::{TreeSink, Tracer, ElementFlags};
use super::types::*;
use super::actions::TreeBuilderActions;
use super::rules::TreeBuilderStep;
use ExpandedName;
use QualName;
use tendril::StrTendril;
use tendril::stream::{TendrilSink, Utf8LossyDecoder};
use tokenizer;
use tokenizer::{Tokenizer, TokenizerOpts};
use tokenizer::{Doctype, StartTag, Tag, TokenSink};
use tokenizer::states as tok_state;
use util::str::is_ascii_whitespace;
use std::default::Default;
use std::mem::replace;
use std::borrow::Cow;
use std::borrow::Cow::Borrowed;
use std::collections::VecDeque;
use driver::*;
use super::{TreeBuilderOpts, TreeBuilder};
use markup5ever::Attribute;
use rcdom::{Node, Handle, RcDom, NodeData};
pub struct LineCountingDOM {
pub line_vec: Vec<(QualName, u64)>,
pub current_line: u64,
pub rcdom: RcDom,
}
impl TreeSink for LineCountingDOM {
type Output = Self;
fn finish(self) -> Self { self }
type Handle = Handle;
fn parse_error(&mut self, msg: Cow<'static, str>) {
self.rcdom.parse_error(msg);
}
fn get_document(&mut self) -> Handle {
self.rcdom.get_document()
}
fn get_template_contents(&mut self, target: &Handle) -> Handle {
self.rcdom.get_template_contents(target)
}
fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.rcdom.set_quirks_mode(mode)
}
fn same_node(&self, x: &Handle, y: &Handle) -> bool {
self.rcdom.same_node(x, y)
}
fn elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a> {
self.rcdom.elem_name(target)
}
fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags)
-> Handle {
self.line_vec.push((name.clone(), self.current_line));
self.rcdom.create_element(name, attrs, flags)
}
fn create_comment(&mut self, text: StrTendril) -> Handle {
self.rcdom.create_comment(text)
}
fn create_pi(&mut self, target: StrTendril, content: StrTendril) -> Handle {
self.rcdom.create_pi(target, content)
}
fn has_parent_node(&self, node: &Handle) -> bool {
self.rcdom.has_parent_node(node)
}
fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
self.rcdom.append(parent, child)
}
fn append_before_sibling(&mut self,
sibling: &Handle,
child: NodeOrText<Handle>) {
self.rcdom.append_before_sibling(sibling, child)
}
fn append_doctype_to_document(&mut self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril) {
self.rcdom.append_doctype_to_document(name, public_id, system_id);
}
fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) {
self.rcdom.add_attrs_if_missing(target, attrs);
}
fn remove_from_parent(&mut self, target: &Handle) {
self.rcdom.remove_from_parent(target);
}
fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
self.rcdom.reparent_children(node, new_parent);
}
fn mark_script_already_started(&mut self, target: &Handle) {
self.rcdom.mark_script_already_started(target);
}
fn set_current_line(&mut self, line_number: u64) {
self.current_line = line_number;
}
}
#[test]
fn check_four_lines() {
let sink = LineCountingDOM {
line_vec: vec!(),
current_line: 1,
rcdom: RcDom::default(),
};
let opts = ParseOpts::default();
let mut resultTok = parse_document(sink, opts);
resultTok.process(StrTendril::from("<a>\n"));
resultTok.process(StrTendril::from("</a>\n"));
resultTok.process(StrTendril::from("<b>\n"));
resultTok.process(StrTendril::from("</b>"));
let actual = resultTok.finish();
let expected = vec![(QualName::new(None, ns!(html), local_name!("html")), 1),
(QualName::new(None, ns!(html), local_name!("head")), 1),
(QualName::new(None, ns!(html), local_name!("body")), 1),
(QualName::new(None, ns!(html), local_name!("a")), 1),
(QualName::new(None, ns!(html), local_name!("b")), 3)];
assert_eq!(actual.line_vec, expected);
}
}