use tokenizer::{XmlTokenizer, XmlTokenizerOpts};
use tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
use std::borrow::Cow;
use markup5ever::buffer_queue::BufferQueue;
use tendril;
use tendril::stream::{TendrilSink, Utf8LossyDecoder};
use tendril::StrTendril;
#[derive(Clone, Default)]
pub struct XmlParseOpts {
pub tokenizer: XmlTokenizerOpts,
pub tree_builder: XmlTreeBuilderOpts,
}
pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
where
Sink: TreeSink,
{
let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
let tok = XmlTokenizer::new(tb, opts.tokenizer);
XmlParser {
tokenizer: tok,
input_buffer: BufferQueue::new(),
}
}
pub struct XmlParser<Sink>
where
Sink: TreeSink,
{
pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
pub input_buffer: BufferQueue,
}
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
type Output = Sink::Output;
fn process(&mut self, t: StrTendril) {
self.input_buffer.push_back(t);
self.tokenizer.feed(&mut self.input_buffer);
}
fn error(&mut self, desc: Cow<'static, str>) {
self.tokenizer.sink.sink.parse_error(desc)
}
fn finish(mut self) -> Self::Output {
self.tokenizer.end();
self.tokenizer.sink.sink.finish()
}
}
impl<Sink: TreeSink> XmlParser<Sink> {
pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
Utf8LossyDecoder::new(self)
}
}
#[cfg(test)]
mod tests {
use super::*;
use rcdom::RcDom;
use serialize::serialize;
use tendril::TendrilSink;
#[test]
fn el_ns_serialize() {
assert_eq_serialization(
"<a:title xmlns:a=\"http://www.foo.org/\" value=\"test\">Test</a:title>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one(
"<a:title xmlns:a=\"http://www.foo.org/\" value=\"test\">Test</title>"
.as_bytes(),
),
);
}
#[test]
fn nested_ns_serialize() {
assert_eq_serialization("<a:x xmlns:a=\"http://www.foo.org/\" xmlns:b=\"http://www.bar.org/\" value=\"test\"><b:y/></a:x>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one("<a:x xmlns:a=\"http://www.foo.org/\" xmlns:b=\"http://www.bar.org/\" value=\"test\"><b:y/></a:x>".as_bytes()));
}
#[test]
fn def_ns_serialize() {
assert_eq_serialization(
"<table xmlns=\"html4\"><td></td></table>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one("<table xmlns=\"html4\"><td></td></table>".as_bytes()),
);
}
#[test]
fn undefine_ns_serialize() {
assert_eq_serialization(
"<a:x xmlns:a=\"http://www.foo.org\"><a:y xmlns:a=\"\"><a:z/></a:y</a:x>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one(
"<a:x xmlns:a=\"http://www.foo.org\"><a:y xmlns:a=\"\"><a:z/></a:y</a:x>"
.as_bytes(),
),
);
}
#[test]
fn redefine_default_ns_serialize() {
assert_eq_serialization(
"<x xmlns=\"http://www.foo.org\"><y xmlns=\"\"><z/></y</x>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one("<x xmlns=\"http://www.foo.org\"><y xmlns=\"\"><z/></y</x>".as_bytes()),
);
}
#[test]
fn attr_serialize() {
assert_serialization(
"<title value=\"test\">Test</title>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one("<title value='test'>Test".as_bytes()),
);
}
#[test]
fn from_utf8() {
assert_serialization(
"<title>Test</title>",
parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one("<title>Test".as_bytes()),
);
}
fn assert_eq_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let dom_from_text = parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one(text.as_bytes());
let mut reserialized = Vec::new();
serialize(
&mut reserialized,
&dom_from_text.document,
Default::default(),
)
.unwrap();
assert_eq!(
String::from_utf8(serialized).unwrap(),
String::from_utf8(reserialized).unwrap()
);
}
fn assert_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
assert_eq!(String::from_utf8(serialized).unwrap(), text);
}
}