use {Attribute, QualName};
use buffer_queue::BufferQueue;
use tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult};
use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink, create_element};
use std::borrow::Cow;
use tendril;
use tendril::StrTendril;
use tendril::stream::{TendrilSink, Utf8LossyDecoder};
#[derive(Clone, Default)]
pub struct ParseOpts {
pub tokenizer: TokenizerOpts,
pub tree_builder: TreeBuilderOpts,
}
pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink> where Sink: TreeSink {
let tb = TreeBuilder::new(sink, opts.tree_builder);
let tok = Tokenizer::new(tb, opts.tokenizer);
Parser { tokenizer: tok, input_buffer: BufferQueue::new() }
}
pub fn parse_fragment<Sink>(mut sink: Sink, opts: ParseOpts,
context_name: QualName, context_attrs: Vec<Attribute>)
-> Parser<Sink>
where Sink: TreeSink {
let context_elem = create_element(&mut sink, context_name, context_attrs);
parse_fragment_for_element(sink, opts, context_elem, None)
}
pub fn parse_fragment_for_element<Sink>(sink: Sink, opts: ParseOpts,
context_element: Sink::Handle,
form_element: Option<Sink::Handle>)
-> Parser<Sink>
where Sink: TreeSink {
let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder);
let tok_opts = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
.. opts.tokenizer
};
let tok = Tokenizer::new(tb, tok_opts);
Parser { tokenizer: tok, input_buffer: BufferQueue::new() }
}
pub struct Parser<Sink> where Sink: TreeSink {
pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>,
pub input_buffer: BufferQueue,
}
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
fn process(&mut self, t: StrTendril) {
self.input_buffer.push_back(t);
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
}
fn error(&mut self, desc: Cow<'static, str>) {
self.tokenizer.sink.sink.parse_error(desc)
}
type Output = Sink::Output;
fn finish(mut self) -> Self::Output {
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
assert!(self.input_buffer.is_empty());
self.tokenizer.end();
self.tokenizer.sink.sink.finish()
}
}
impl<Sink: TreeSink> Parser<Sink> {
pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
Utf8LossyDecoder::new(self)
}
}
#[cfg(test)]
mod tests {
use rcdom::RcDom;
use serialize::serialize;
use tendril::TendrilSink;
use super::*;
#[test]
fn from_utf8() {
let dom = parse_document(RcDom::default(), ParseOpts::default())
.from_utf8()
.one("<title>Test".as_bytes());
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
assert_eq!(String::from_utf8(serialized).unwrap().replace(" ", ""),
"<html><head><title>Test</title></head><body></body></html>");
}
}