mod actions;
mod rules;
mod types;
pub mod interface;
use std::borrow::{Cow};
use std::borrow::Cow::Borrowed;
use std::collections::{VecDeque, BTreeMap, HashSet};
use std::collections::btree_map::{Iter};
use std::result::Result;
use std::mem;
use {Prefix, Namespace, LocalName};
use tokenizer::{self, TokenSink, Tag, QName, Attribute, StartTag};
pub use self::interface::{TreeSink, Tracer, NextParserState, NodeOrText};
use self::rules::XmlTreeBuilderStep;
use self::types::*;
static XML_URI: &'static str = "http://www.w3.org/XML/1998/namespace";
static XMLNS_URI: &'static str = "http://www.w3.org/2000/xmlns/";
type InsResult = Result<(), Cow<'static, str>>;
#[derive(Debug)]
struct NamespaceMapStack(Vec<NamespaceMap>);
impl NamespaceMapStack{
fn new() -> NamespaceMapStack {
NamespaceMapStack({
let mut vec = Vec::new();
vec.push(NamespaceMap::default());
vec
})
}
fn push(&mut self, map: NamespaceMap) {
self.0.push(map);
}
#[doc(hidden)]
pub fn pop(&mut self) {
self.0.pop();
}
}
#[derive(Debug)]
#[doc(hidden)]
pub struct NamespaceMap {
scope: BTreeMap<Prefix, Option<Namespace>>,
}
impl NamespaceMap {
#[doc(hidden)]
pub fn empty() -> NamespaceMap {
NamespaceMap {
scope: BTreeMap::new(),
}
}
fn default() -> NamespaceMap {
NamespaceMap {
scope: {
let mut map = BTreeMap::new();
map.insert(namespace_prefix!(""), None);
map.insert(namespace_prefix!("xml"), Some(ns!(xml)));
map.insert(namespace_prefix!("xmlns"), Some(ns!(xmlns)));
map
},
}
}
#[doc(hidden)]
pub fn get(&self, prefix: &Prefix) -> Option<&Option<Namespace>> {
self.scope.get(prefix)
}
#[doc(hidden)]
pub fn get_scope_iter(&self) -> Iter<Prefix, Option<Namespace>> {
self.scope.iter()
}
#[doc(hidden)]
pub fn insert(&mut self, name: &QName) {
let prefix = Prefix::from(&*name.prefix);
let namespace = Some(Namespace::from(&*name.namespace_url));
self.scope.insert(prefix, namespace);
}
fn insert_ns(&mut self, attr: &Attribute) -> InsResult {
if &*attr.value == XMLNS_URI {
return Err(Borrowed("Can't declare XMLNS URI"));
};
let opt_uri = if attr.value.is_empty() {
None
} else {
Some(Namespace::from(&*attr.value))
};
let result = match (&*attr.name.prefix, &*attr.name.local) {
("xmlns", "xml") => {
if &*attr.value != XML_URI {
Err(Borrowed("XML namespace can't be redeclared"))
} else {
Ok(())
}
},
("xmlns" , "xmlns") => {
Err(Borrowed("XMLNS namespaces can't be changed"))
},
("xmlns", _)
| ("", "xmlns")=> {
let ext = if attr.name.prefix.is_empty() {
namespace_prefix!("")
} else {
Prefix::from(&*attr.name.local)
};
if self.scope.contains_key(&ext) && opt_uri.is_some() {
Err(Borrowed("Namespace already defined"))
} else {
self.scope.insert(ext, opt_uri);
Ok(())
}
},
(_, _) => {
Err(Borrowed("Invalid namespace declaration."))
}
};
result
}
}
#[derive(Copy, Clone)]
pub struct XmlTreeBuilderOpts {
pub exact_errors: bool,
pub profile: bool,
}
impl Default for XmlTreeBuilderOpts {
fn default() -> XmlTreeBuilderOpts {
XmlTreeBuilderOpts{
exact_errors: false,
profile: false,
}
}
}
pub struct XmlTreeBuilder<Handle, Sink> {
opts: XmlTreeBuilderOpts,
sink: Sink,
doc_handle: Handle,
next_tokenizer_state: Option<tokenizer::states::XmlState>,
open_elems: Vec<Handle>,
curr_elem: Option<Handle>,
namespace_stack: NamespaceMapStack,
current_namespace: NamespaceMap,
present_attrs: HashSet<(Namespace, LocalName)>,
phase: XmlPhase,
}
impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
pub fn new(mut sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
XmlTreeBuilder {
opts: opts,
sink: sink,
doc_handle: doc_handle,
next_tokenizer_state: None,
open_elems: vec!(),
curr_elem: None,
namespace_stack: NamespaceMapStack::new(),
current_namespace: NamespaceMap::empty(),
present_attrs: HashSet::new(),
phase: StartPhase,
}
}
pub fn unwrap(self) -> Sink {
self.sink
}
pub fn sink<'a>(&'a self) -> &'a Sink {
&self.sink
}
pub fn sink_mut<'a>(&'a mut self) -> &'a mut Sink {
&mut self.sink
}
pub fn trace_handles(&self, tracer: &Tracer<Handle=Handle>) {
tracer.trace_handle(self.doc_handle.clone());
for e in self.open_elems.iter() {
tracer.trace_handle(e.clone());
}
self.curr_elem.as_ref().map(|h| tracer.trace_handle(h.clone()));
}
#[cfg(not(for_c))]
#[allow(dead_code)]
fn dump_state(&self, label: String) {
debug!("dump_state on {}", label);
debug!(" open_elems:");
for node in self.open_elems.iter() {
let QName { prefix, local, .. } = self.sink.elem_name(node);
debug!(" {:?}:{:?}", prefix,local);
}
debug!("");
}
#[cfg(for_c)]
fn debug_step(&self, _mode: XmlPhase, _token: &Token) {
}
#[cfg(not(for_c))]
fn debug_step(&self, mode: XmlPhase, token: &Token) {
debug!("processing {:?} in insertion mode {:?}", format!("{:?}", token), mode);
}
fn declare_ns(&mut self, attr: &mut Attribute) {
if let Err(msg) = self.current_namespace.insert_ns(&attr) {
self.sink.parse_error(msg);
} else {
attr.name.namespace_url = ns!(xmlns);
}
}
fn find_uri(&self, prefix: &Prefix) -> Result<Option<Namespace>, Cow<'static, str> >{
let mut uri = Err(Borrowed("No appropriate namespace found"));
for ns in self.namespace_stack.0.iter()
.chain(Some(&self.current_namespace)).rev() {
if let Some(el) = ns.get(prefix) {
uri = Ok(el.clone());
break;
}
}
uri
}
fn bind_qname(&mut self, name: &mut QName) {
match self.find_uri(&name.prefix) {
Ok(uri) => {
let ns_uri = match uri {
Some(e) => e,
None => ns!(),
};
name.namespace_url = ns_uri;
},
Err(msg) => {
self.sink.parse_error(msg);
},
}
}
fn bind_attr_qname(&mut self, name: &mut QName) -> bool {
let mut not_duplicate = true;
if !name.prefix.is_empty() {
self.bind_qname(name);
not_duplicate = self.check_duplicate_attr(name);
}
not_duplicate
}
fn check_duplicate_attr(&mut self, name: &QName) -> bool {
let pair = (name.namespace_url.clone(), name.local.clone());
if self.present_attrs.contains(&pair) {
return false;
}
self.present_attrs.insert(pair);
true
}
fn process_namespaces(&mut self, tag: &mut Tag) {
let mut new_attr = vec![];
for mut attr in tag.attrs.iter_mut()
.filter(|attr| attr.name.prefix == namespace_prefix!("xmlns")
|| attr.name.local == local_name!("xmlns")) {
self.declare_ns(&mut attr);
}
for mut attr in tag.attrs.iter_mut()
.filter(|attr| attr.name.prefix != namespace_prefix!("xmlns")
&& attr.name.local != local_name!("xmlns")) {
if self.bind_attr_qname(&mut attr.name) {
new_attr.push(attr.clone());
}
}
mem::replace(&mut tag.attrs, new_attr);
self.bind_qname(&mut tag.name);
let x = mem::replace(&mut self.current_namespace, NamespaceMap::empty());
if tag.kind == StartTag {
self.namespace_stack.push(x);
}
}
fn process_to_completion(&mut self, mut token: Token) {
let mut more_tokens = VecDeque::new();
loop {
let phase = self.phase;
match self.step(phase, token) {
Done => {
token = unwrap_or_return!(more_tokens.pop_front(), ());
}
Reprocess(m, t) => {
self.phase = m;
token = t;
}
}
}
}
}
impl<Handle, Sink> TokenSink
for XmlTreeBuilder<Handle, Sink>
where Handle: Clone,
Sink: TreeSink<Handle=Handle>,
{
fn process_token(&mut self, token: tokenizer::Token) {
let token = match token {
tokenizer::ParseError(e) => {
self.sink.parse_error(e);
return;
}
tokenizer::DoctypeToken(d) =>DoctypeToken(d),
tokenizer::PIToken(x) => PIToken(x),
tokenizer::TagToken(x) => TagToken(x),
tokenizer::CommentToken(x) => CommentToken(x),
tokenizer::NullCharacterToken => NullCharacterToken,
tokenizer::EOFToken => EOFToken,
tokenizer::CharacterTokens(x) => CharacterTokens(x),
};
self.process_to_completion(token);
}
fn query_state_change(&mut self) -> Option<tokenizer::states::XmlState> {
self.next_tokenizer_state.take()
}
}