mod actions;
mod rules;
mod types;
pub mod interface;
use std::borrow::{Cow};
use std::borrow::Cow::Borrowed;
use std::collections::{VecDeque, BTreeMap, HashSet};
use std::result::Result;
use std::mem;
use string_cache::Atom;
use tokenizer::{self, TokenSink, Tag, QName, Attribute, StartTag};
pub use self::interface::{TreeSink, Tracer, NextParserState, NodeOrText};
use self::rules::XmlTreeBuilderStep;
use self::types::*;
static XML_URI: &'static str = "http://www.w3.org/XML/1998/namespace";
static XMLNS_URI: &'static str = "http://www.w3.org/2000/xmlns/";
macro_rules! atoms {
() => (Atom::from(""));
(xml) => (Atom::from("xml"));
(xml_uri) => (Atom::from(XML_URI));
(xmlns) => (Atom::from("xmlns"));
(xmlns_uri) => (Atom::from(XMLNS_URI))
}
type InsResult = Result<(), Cow<'static, str>>;
#[derive(Debug)]
struct NamespaceStack(Vec<Namespace>);
impl NamespaceStack{
fn new() -> NamespaceStack {
NamespaceStack({
let mut vec = Vec::new();
vec.push(Namespace::default());
vec
})
}
fn push(&mut self, namespace: Namespace) {
self.0.push(namespace);
}
fn pop(&mut self) {
self.0.pop();
}
}
type UriMapping = (Atom, Atom);
#[derive(Debug)]
struct Namespace {
scope: BTreeMap<Atom, Option<Atom>>,
}
impl Namespace {
fn empty() -> Namespace {
Namespace{
scope: BTreeMap::new(),
}
}
fn default() -> Namespace {
Namespace {
scope: {
let mut map = BTreeMap::new();
map.insert(atoms!(), None);
map.insert(atoms!(xml), Some(atoms!(xml_uri)));
map.insert(atoms!(xmlns), Some(atoms!(xmlns_uri)));
map
},
}
}
fn get(&self, prefix: &Atom) -> Option<&Option<Atom>> {
self.scope.get(prefix)
}
fn insert_ns(&mut self, attr: &Attribute) -> InsResult {
if &*attr.value == XMLNS_URI {
return Err(Borrowed("Can't declare XMLNS URI"));
};
let opt_uri = if &*attr.value == "" {
None
} else {
Some(Atom::from(&*attr.value))
};
let result = match (&*attr.name.prefix, &*attr.name.local) {
("xmlns", "xml") => {
if &*attr.value != XML_URI {
Err(Borrowed("XML namespace can't be redeclared"))
} else {
Ok(())
}
},
("xmlns" , "xmlns") => {
Err(Borrowed("XMLNS namespaces can't be changed"))
},
("xmlns", _)
| ("", "xmlns")=> {
let ext = if &*attr.name.prefix == "" {
atoms!()
} else {
Atom::from(&*attr.name.local)
};
if self.scope.contains_key(&ext) && opt_uri.is_some() {
Err(Borrowed("Namespace already defined"))
} else {
self.scope.insert(ext, opt_uri);
Ok(())
}
},
(_, _) => {
Err(Borrowed("Invalid namespace declaration."))
}
};
result
}
}
pub struct XmlTreeBuilder<Handle, Sink> {
sink: Sink,
doc_handle: Handle,
next_tokenizer_state: Option<tokenizer::states::XmlState>,
open_elems: Vec<Handle>,
curr_elem: Option<Handle>,
namespace_stack: NamespaceStack,
current_namespace: Namespace,
present_attrs: HashSet<(Atom, Atom)>,
phase: XmlPhase,
}
impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
pub fn new(mut sink: Sink) -> XmlTreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
XmlTreeBuilder {
sink: sink,
doc_handle: doc_handle,
next_tokenizer_state: None,
open_elems: vec!(),
curr_elem: None,
namespace_stack: NamespaceStack::new(),
current_namespace: Namespace::empty(),
present_attrs: HashSet::new(),
phase: StartPhase,
}
}
pub fn unwrap(self) -> Sink {
self.sink
}
pub fn sink<'a>(&'a self) -> &'a Sink {
&self.sink
}
pub fn sink_mut<'a>(&'a mut self) -> &'a mut Sink {
&mut self.sink
}
pub fn trace_handles(&self, tracer: &Tracer<Handle=Handle>) {
tracer.trace_handle(self.doc_handle.clone());
for e in self.open_elems.iter() {
tracer.trace_handle(e.clone());
}
self.curr_elem.as_ref().map(|h| tracer.trace_handle(h.clone()));
}
#[cfg(not(for_c))]
#[allow(dead_code)]
fn dump_state(&self, label: String) {
debug!("dump_state on {}", label);
debug!(" open_elems:");
for node in self.open_elems.iter() {
let QName { prefix, local, .. } = self.sink.elem_name(node);
debug!(" {:?}:{:?}", prefix,local);
}
debug!("");
}
#[cfg(for_c)]
fn debug_step(&self, _mode: XmlPhase, _token: &Token) {
}
#[cfg(not(for_c))]
fn debug_step(&self, mode: XmlPhase, token: &Token) {
debug!("processing {:?} in insertion mode {:?}", format!("{:?}", token), mode);
}
fn declare_ns(&mut self, attr: &mut Attribute) {
if let Err(msg) = self.current_namespace.insert_ns(&attr) {
self.sink.parse_error(msg);
} else {
attr.name.namespace_url = atoms!(xmlns_uri);
}
}
fn find_uri(&self, prefix: &Atom) -> Result<Option<Atom>, Cow<'static, str> >{
let mut uri = Err(Borrowed("No appropriate namespace found"));
for ns in self.namespace_stack.0.iter()
.chain(Some(&self.current_namespace)).rev() {
if let Some(el) = ns.get(prefix) {
uri = Ok(el.clone());
break;
}
}
uri
}
fn bind_qname(&mut self, name: &mut QName) {
match self.find_uri(&name.prefix) {
Ok(uri) => {
let ns_uri = match uri {
Some(e) => e,
None => atoms!(),
};
name.namespace_url = ns_uri;
},
Err(msg) => {
self.sink.parse_error(msg);
},
}
}
fn bind_attr_qname(&mut self, name: &mut QName) -> bool {
let mut not_duplicate = true;
if &*name.prefix != "" {
self.bind_qname(name);
not_duplicate = self.check_duplicate_attr(name);
}
not_duplicate
}
fn check_duplicate_attr(&mut self, name: &QName) -> bool {
let pair = (name.namespace_url.clone(), name.local.clone());
if self.present_attrs.contains(&pair) {
return false;
}
self.present_attrs.insert(pair);
true
}
fn process_namespaces(&mut self, tag: &mut Tag) {
let mut new_attr = vec![];
for mut attr in tag.attrs.iter_mut()
.filter(|attr| &attr.name.prefix == &atoms!(xmlns)
|| attr.name.local == atoms!(xmlns)) {
self.declare_ns(&mut attr);
}
for mut attr in tag.attrs.iter_mut()
.filter(|attr| &attr.name.prefix != &atoms!(xmlns)
&& attr.name.local != atoms!(xmlns)) {
if self.bind_attr_qname(&mut attr.name) {
new_attr.push(attr.clone());
}
}
mem::replace(&mut tag.attrs, new_attr);
self.bind_qname(&mut tag.name);
let x = mem::replace(&mut self.current_namespace, Namespace::empty());
if tag.kind == StartTag {
self.namespace_stack.push(x);
}
}
fn process_to_completion(&mut self, mut token: Token) {
let mut more_tokens = VecDeque::new();
loop {
let phase = self.phase;
match self.step(phase, token) {
Done => {
token = unwrap_or_return!(more_tokens.pop_front(), ());
}
Reprocess(m, t) => {
self.phase = m;
token = t;
}
}
}
}
}
impl<Handle, Sink> TokenSink
for XmlTreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
fn process_token(&mut self, token: tokenizer::Token) {
let token = match token {
tokenizer::ParseError(e) => {
self.sink.parse_error(e);
return;
}
tokenizer::DoctypeToken(d) =>DoctypeToken(d),
tokenizer::PIToken(x) => PIToken(x),
tokenizer::TagToken(x) => TagToken(x),
tokenizer::CommentToken(x) => CommentToken(x),
tokenizer::NullCharacterToken => NullCharacterToken,
tokenizer::EOFToken => EOFToken,
tokenizer::CharacterTokens(x) => CharacterTokens(x),
};
self.process_to_completion(token);
}
fn query_state_change(&mut self) -> Option<tokenizer::states::XmlState> {
self.next_tokenizer_state.take()
}
}