#![allow(warnings)]
pub use self::interface::{QuirksMode, Quirks, LimitedQuirks, NoQuirks};
pub use self::interface::{NodeOrText, AppendNode, AppendText};
pub use self::interface::{TreeSink, Tracer, NextParserState};
use self::types::*;
use self::actions::TreeBuilderActions;
use self::rules::TreeBuilderStep;
use string_cache::QualName;
use tendril::StrTendril;
use tokenizer;
use tokenizer::{Doctype, Tag};
use tokenizer::TokenSink;
use tokenizer::states as tok_state;
use util::str::is_ascii_whitespace;
use std::default::Default;
use std::mem::replace;
use std::borrow::Cow::Borrowed;
use std::collections::VecDeque;
#[macro_use] mod tag_sets;
pub mod interface;
mod data;
mod types;
mod actions;
#[path = "rules.expanded.rs"] mod rules;
#[derive(Copy, Clone)]
pub struct TreeBuilderOpts {
pub exact_errors: bool,
pub scripting_enabled: bool,
pub iframe_srcdoc: bool,
pub drop_doctype: bool,
pub ignore_missing_rules: bool,
pub quirks_mode: QuirksMode,
}
impl Default for TreeBuilderOpts {
fn default() -> TreeBuilderOpts {
TreeBuilderOpts {
exact_errors: false,
scripting_enabled: true,
iframe_srcdoc: false,
drop_doctype: false,
ignore_missing_rules: false,
quirks_mode: NoQuirks,
}
}
}
pub struct TreeBuilder<Handle, Sink> {
opts: TreeBuilderOpts,
sink: Sink,
mode: InsertionMode,
orig_mode: Option<InsertionMode>,
template_modes: Vec<InsertionMode>,
pending_table_text: Vec<(SplitStatus, StrTendril)>,
quirks_mode: QuirksMode,
doc_handle: Handle,
open_elems: Vec<Handle>,
active_formatting: Vec<FormatEntry<Handle>>,
head_elem: Option<Handle>,
form_elem: Option<Handle>,
next_tokenizer_state: Option<tokenizer::states::State>,
frameset_ok: bool,
ignore_lf: bool,
foster_parenting: bool,
context_elem: Option<Handle>,
}
impl<Handle, Sink> TreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
template_modes: vec!(),
pending_table_text: vec!(),
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
open_elems: vec!(),
active_formatting: vec!(),
head_elem: None,
form_elem: None,
next_tokenizer_state: None,
frameset_ok: true,
ignore_lf: false,
foster_parenting: false,
context_elem: None,
}
}
pub fn new_for_fragment(mut sink: Sink,
context_elem: Handle,
form_elem: Option<Handle>,
opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
let context_is_template =
sink.elem_name(context_elem.clone()) == qualname!(HTML, template);
let mut tb = TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
template_modes: if context_is_template { vec![InTemplate] } else { vec![] },
pending_table_text: vec!(),
quirks_mode: opts.quirks_mode,
doc_handle: doc_handle,
open_elems: vec!(),
active_formatting: vec!(),
head_elem: None,
form_elem: form_elem,
next_tokenizer_state: None,
frameset_ok: true,
ignore_lf: false,
foster_parenting: false,
context_elem: Some(context_elem),
};
tb.create_root(vec!());
tb.mode = tb.reset_insertion_mode();
tb
}
pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
let elem = self.context_elem.clone().expect("no context element");
let name = match self.sink.elem_name(elem) {
QualName { ns: ns!(HTML), local } => local,
_ => return tok_state::Data
};
match name {
atom!(title) | atom!(textarea) => tok_state::RawData(tok_state::Rcdata),
atom!(style) | atom!(xmp) | atom!(iframe)
| atom!(noembed) | atom!(noframes) => tok_state::RawData(tok_state::Rawtext),
atom!(script) => tok_state::RawData(tok_state::ScriptData),
atom!(noscript) => if self.opts.scripting_enabled {
tok_state::RawData(tok_state::Rawtext)
} else {
tok_state::Data
},
atom!(plaintext) => tok_state::Plaintext,
_ => tok_state::Data
}
}
pub fn unwrap(self) -> Sink {
self.sink
}
pub fn sink<'a>(&'a self) -> &'a Sink {
&self.sink
}
pub fn sink_mut<'a>(&'a mut self) -> &'a mut Sink {
&mut self.sink
}
pub fn trace_handles(&self, tracer: &Tracer<Handle=Handle>) {
tracer.trace_handle(self.doc_handle.clone());
for e in self.open_elems.iter() {
tracer.trace_handle(e.clone());
}
for e in self.active_formatting.iter() {
match e {
&Element(ref h, _) => tracer.trace_handle(h.clone()),
_ => (),
}
}
self.head_elem.as_ref().map(|h| tracer.trace_handle(h.clone()));
self.form_elem.as_ref().map(|h| tracer.trace_handle(h.clone()));
self.context_elem.as_ref().map(|h| tracer.trace_handle(h.clone()));
}
#[allow(dead_code)]
fn dump_state(&self, label: String) {
use string_cache::QualName;
println!("dump_state on {}", label);
print!(" open_elems:");
for node in self.open_elems.iter() {
let QualName { ns, local } = self.sink.elem_name(node.clone());
match ns {
ns!(HTML) => print!(" {}", &local[..]),
_ => panic!(),
}
}
println!("");
print!(" active_formatting:");
for entry in self.active_formatting.iter() {
match entry {
&Marker => print!(" Marker"),
&Element(ref h, _) => {
let QualName { ns, local } = self.sink.elem_name(h.clone());
match ns {
ns!(HTML) => print!(" {}", &local[..]),
_ => panic!(),
}
}
}
}
println!("");
}
fn debug_step(&self, mode: InsertionMode, token: &Token) {
use util::str::to_escaped_string;
debug!("processing {} in insertion mode {:?}", to_escaped_string(token), mode);
}
fn process_to_completion(&mut self, mut token: Token) {
let mut more_tokens = VecDeque::new();
loop {
let is_self_closing = match token {
TagToken(Tag { self_closing: c, .. }) => c,
_ => false,
};
let result = if self.is_foreign(&token) {
self.step_foreign(token)
} else {
let mode = self.mode;
self.step(mode, token)
};
match result {
Done => {
if is_self_closing {
self.sink.parse_error(Borrowed("Unacknowledged self-closing tag"));
}
token = unwrap_or_return!(more_tokens.pop_front(), ());
}
DoneAckSelfClosing => {
token = unwrap_or_return!(more_tokens.pop_front(), ());
}
Reprocess(m, t) => {
self.mode = m;
token = t;
}
ReprocessForeign(t) => {
token = t;
}
SplitWhitespace(mut buf) => {
let p = buf.pop_front_char_run(is_ascii_whitespace);
let (first, is_ws) = unwrap_or_return!(p, ());
let status = if is_ws { Whitespace } else { NotWhitespace };
token = CharacterTokens(status, first);
if buf.len32() > 0 {
more_tokens.push_back(CharacterTokens(NotSplit, buf));
}
}
}
}
}
pub fn is_fragment(&self) -> bool {
self.context_elem.is_some()
}
}
impl<Handle, Sink> TokenSink
for TreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
fn process_token(&mut self, token: tokenizer::Token) {
let ignore_lf = replace(&mut self.ignore_lf, false);
let token = match token {
tokenizer::ParseError(e) => {
self.sink.parse_error(e);
return;
}
tokenizer::DoctypeToken(dt) => if self.mode == Initial {
let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
if err {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"Bad DOCTYPE",
"Bad DOCTYPE: {:?}", dt));
}
let Doctype { name, public_id, system_id, force_quirks: _ } = dt;
if !self.opts.drop_doctype {
self.sink.append_doctype_to_document(
name.unwrap_or(StrTendril::new()),
public_id.unwrap_or(StrTendril::new()),
system_id.unwrap_or(StrTendril::new())
);
}
self.set_quirks_mode(quirk);
self.mode = BeforeHtml;
return;
} else {
self.sink.parse_error(format_if!(
self.opts.exact_errors,
"DOCTYPE in body",
"DOCTYPE in insertion mode {:?}", self.mode));
return;
},
tokenizer::TagToken(x) => TagToken(x),
tokenizer::CommentToken(x) => CommentToken(x),
tokenizer::NullCharacterToken => NullCharacterToken,
tokenizer::EOFToken => EOFToken,
tokenizer::CharacterTokens(mut x) => {
if ignore_lf && x.starts_with("\n") {
x.pop_front(1);
}
if x.is_empty() {
return;
}
CharacterTokens(NotSplit, x)
}
};
self.process_to_completion(token);
}
fn query_state_change(&mut self) -> Option<tokenizer::states::State> {
self.next_tokenizer_state.take()
}
}