#![warn(missing_docs)]
extern crate xml;
mod document;
mod selector;
pub use self::{
document::{Document, DocumentError},
selector::{CompoundSelector, MatchType, Scope, Selector, UnexpectedTokenError},
};
use std::{
collections::HashMap,
iter::{empty, once},
marker::PhantomData,
};
#[derive(Clone, Debug)]
pub struct Element {
node_index: usize,
tag_name: String,
children: Option<Vec<Element>>,
attr_map: HashMap<String, String>,
text: String,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum SelectError {
ParseError(UnexpectedTokenError),
NoMatchError,
}
struct UniqueElements<'a, I: Iterator<Item = &'a Element> + 'a> {
next_index: usize,
inner_iter: I,
phantom_data: PhantomData<&'a i32>,
}
impl<'a, I: Iterator<Item = &'a Element>> Iterator for UniqueElements<'a, I> {
type Item = &'a Element;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.inner_iter.next() {
Some(element) if element.node_index < self.next_index => {
println!("SKIPPED");
}
Some(element) => {
self.next_index = element.node_index + 1;
return Some(element);
}
None => return None,
}
}
}
}
impl Element {
pub fn select_all<'a>(
&'a self,
selector: &str,
) -> Result<Box<dyn Iterator<Item = &'a Element> + 'a>, SelectError> {
CompoundSelector::parse(selector)
.map_err(|err| SelectError::ParseError(err))
.and_then(|compound_selectors| {
let initial_iterator: Box<dyn Iterator<Item = &'a Element>> = Box::new(once(self));
let iterator = compound_selectors.into_iter().fold(
initial_iterator,
|iter, compound_selector| {
let scope = compound_selector.scope;
let children_iter = iter.flat_map(move |child| match scope {
Scope::IndirectChild => child.children_deep_iter(),
Scope::DirectChild => child.children_iter(),
});
let matching_children_iter = children_iter.filter_map(move |child| {
if child.matches(&compound_selector) {
Some(child)
} else {
None
}
});
let unique_children_iter = UniqueElements {
next_index: 0,
inner_iter: matching_children_iter,
phantom_data: PhantomData,
};
Box::new(unique_children_iter)
},
);
return Ok(iterator);
})
}
pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> {
self.select_all(selector).and_then(|mut iterator| {
if let Some(element) = iterator.next() {
Ok(element)
} else {
Err(SelectError::NoMatchError)
}
})
}
pub fn children_iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Element> + 'a> {
if let Some(ref children) = self.children {
Box::new(children.iter().map(|node| -> &'a Element { node }))
} else {
Box::new(empty::<&'a Element>())
}
}
pub fn children_deep_iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a Element> + 'a> {
let iterator = self
.children_iter()
.flat_map(|child| once(child).chain(child.children_deep_iter()));
Box::new(iterator)
}
pub fn subtree_size(&self) -> usize {
if let Some(ref children) = self.children {
children
.iter()
.fold(1, |subtotal, child| child.subtree_size() + subtotal)
} else {
1
}
}
pub fn tag_name(&self) -> &str {
&self.tag_name
}
pub fn attr(&self, attr_name: &str) -> Option<&String> {
self.attr_map.get(attr_name)
}
pub fn text(&self) -> &String {
&self.text
}
pub fn matches(&self, compound_selector: &CompoundSelector) -> bool {
compound_selector.parts.iter().all(|part| match part {
&Selector::TagName(ref name) => self.tag_name() == name,
&Selector::Id(ref id) => self.attr("id") == Some(id),
&Selector::Attribute(ref attr, MatchType::Equals, ref value) => {
self.attr(attr) == Some(value)
}
})
}
pub fn node_index(&self) -> usize {
self.node_index
}
}