use core::fmt;
use std::cell::RefCell;
use std::error::Error;
use std::fmt::Debug;
use std::io::BufRead;
use std::mem;
use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event};
use quick_xml::name::ResolveResult;
use quick_xml::{NsReader, Reader};
use url::Url;
#[cfg(test)]
mod tests;
pub(crate) type XmlResult<T> = Result<T, XmlError>;
pub(crate) struct ElementSource<R: BufRead> {
state: RefCell<SourceState<R>>,
}
impl<R: BufRead> ElementSource<R> {
pub(crate) fn new(xml_data: R, xml_base_uri: Option<&str>) -> XmlResult<ElementSource<R>> {
let mut reader = NsReader::from_reader(xml_data);
let config = reader.config_mut();
config.expand_empty_elements = true;
config.trim_markup_names_in_closing_tags = true;
config.trim_text(false);
let state = RefCell::new(SourceState::new(reader, xml_base_uri)?);
Ok(ElementSource { state })
}
pub(crate) fn set_default_default_namespace(&self, namespace: NS) {
let mut state = self.state.borrow_mut();
if state.default_namespace == NS::Unknown {
state.default_namespace = namespace;
}
}
pub(crate) fn root(&self) -> XmlResult<Option<Element<R>>> {
self.next_element_at_depth(1)
}
fn children_as_string(&self, depth: u32, buffer: &mut String) -> XmlResult<()> {
let mut state = self.state.borrow_mut();
let mut current_depth = depth;
loop {
let peeked = state.peek();
if peeked.is_err() {
return Err(state.next().err().unwrap());
}
if let Some(event) = peeked.as_ref().unwrap() {
match event {
XmlEvent::Start { name, attributes, .. } => {
current_depth += 1;
append_element_start(buffer, name, attributes);
}
XmlEvent::Text(text) => {
append_element_text(buffer, text);
}
XmlEvent::End { name, .. } => {
current_depth -= 1;
if current_depth < depth {
break;
}
append_element_end(buffer, name);
}
}
state.next()?;
} else {
break;
}
}
Ok(())
}
fn next_element_at_depth(&self, iter_depth: u32) -> XmlResult<Option<Element<R>>> {
let mut state = self.state.borrow_mut();
while let Some(node) = state.next()? {
match node {
XmlEvent::Start { name, attributes, namespace } => {
state.current_depth += 1;
ElementSource::xml_base_push(&mut state, &attributes)?;
if state.current_depth == iter_depth {
let element = Element {
namespace,
name,
attributes,
xml_base: ElementSource::xml_base_fetch(&state),
source: self,
depth: state.current_depth,
};
return Ok(Some(element));
}
}
XmlEvent::End { .. } => {
state.current_depth -= 1;
ElementSource::xml_base_pop(&mut state);
}
_ => {}
}
if state.current_depth < iter_depth - 1 {
return Ok(None);
}
}
if state.current_depth > 0 {
let e = quick_xml::Error::Syntax(quick_xml::errors::SyntaxError::UnclosedTag);
Err(XmlError::Parser { e })
} else {
Ok(None)
}
}
fn text_node(&self) -> Option<String> {
let mut state = self.state.borrow_mut();
if let Ok(Some(XmlEvent::Text(_text))) = state.peek() {
match state.next() {
Ok(Some(XmlEvent::Text(text))) => return Some(text),
_ => unreachable!("state.next() did not return expected XmlEvent::Text"),
}
}
None
}
fn xml_base_fetch(state: &SourceState<R>) -> Option<Url> {
state.base_uris.last().map(|(_, uri)| uri.clone())
}
fn xml_base_pop(state: &mut SourceState<R>) {
while !state.base_uris.is_empty() {
let (depth, _) = state.base_uris.last().unwrap();
if depth > &state.current_depth {
state.base_uris.pop();
} else {
break;
}
}
}
fn xml_base_push(state: &mut SourceState<R>, attributes: &[NameValue]) -> XmlResult<()> {
let xml_base = attributes.iter().find(|nv| nv.name == "xml:base").map(|nv| &nv.value);
if let Some(xml_base) = xml_base {
match Url::parse(xml_base) {
Ok(uri) => {
state.base_uris.push((state.current_depth, uri));
}
Err(url::ParseError::RelativeUrlWithoutBase) => {
if let Some((_, last)) = state.base_uris.last() {
if let Ok(with_base) = last.join(xml_base) {
state.base_uris.push((state.current_depth, with_base));
}
}
}
Err(e) => return Err(XmlError::Url { e }),
}
}
Ok(())
}
}
struct SourceState<R: BufRead> {
reader: NsReader<R>,
buf_event: Vec<u8>,
next: XmlResult<Option<XmlEvent>>,
current_depth: u32,
base_uris: Vec<(u32, Url)>,
default_namespace: NS,
}
impl<R: BufRead> SourceState<R> {
fn new(reader: NsReader<R>, xml_base_uri: Option<&str>) -> XmlResult<SourceState<R>> {
let mut base_uris = Vec::new();
if let Some(xml_base_uri) = xml_base_uri {
let uri = Url::parse(xml_base_uri)?;
base_uris.push((0, uri));
}
let buf_event = Vec::with_capacity(512);
let mut state = SourceState {
reader,
buf_event,
next: Ok(None),
current_depth: 0,
base_uris,
default_namespace: NS::Unknown,
};
state.next = state.fetch_next();
Ok(state)
}
fn fetch_next(&mut self) -> XmlResult<Option<XmlEvent>> {
let decoder = self.reader.decoder();
let reader = &mut self.reader;
loop {
let (ns_resolution, event) = reader.read_resolved_event_into(&mut self.buf_event)?;
match event {
Event::Start(ref e) => {
let namespace = match ns_resolution {
ResolveResult::Bound(ns) => decoder
.decode(ns.as_ref())
.map(|decoded| NS::parse(decoded.as_ref()))
.unwrap_or(self.default_namespace),
ResolveResult::Unknown(_) => self.default_namespace,
ResolveResult::Unbound => self.default_namespace,
};
return Ok(Some(XmlEvent::start(namespace, e, reader)));
}
Event::End(ref e) => {
return Ok(Some(XmlEvent::end(e, reader)));
}
Event::Text(ref t) => {
let event = XmlEvent::text(t, reader);
if let Ok(Some(ref _t)) = event {
return event;
}
}
Event::CData(t) => {
let event = XmlEvent::text_from_cdata(&t, reader);
if let Ok(Some(ref _t)) = event {
return event;
}
}
Event::Eof => {
return Ok(None);
}
_ => {}
}
}
}
fn next(&mut self) -> XmlResult<Option<XmlEvent>> {
let next = mem::replace(&mut self.next, Ok(None));
self.next = self.fetch_next();
next
}
fn peek(&mut self) -> &XmlResult<Option<XmlEvent>> {
&self.next
}
}
pub(crate) struct Element<'a, R: BufRead> {
pub name: String,
pub namespace: NS,
pub attributes: Vec<NameValue>,
pub xml_base: Option<Url>,
depth: u32,
source: &'a ElementSource<R>,
}
#[allow(clippy::needless_lifetimes)]
impl<'a, R: BufRead> Element<'a, R> {
pub(crate) fn attr_value(&self, name: &str) -> Option<String> {
self.attributes.iter().find(|a| a.name == name).map(|a| a.value.clone())
}
pub(crate) fn child_as_text(&self) -> Option<String> {
self.source.text_node()
}
pub(crate) fn children(&self) -> ElementIter<R> {
ElementIter {
source: self.source,
depth: self.depth + 1,
}
}
pub(crate) fn children_as_string(&self) -> XmlResult<Option<String>> {
let mut buffer = String::new();
self.source.children_as_string(self.depth + 1, &mut buffer)?;
Ok(Some(buffer))
}
pub(crate) fn ns_and_tag(&self) -> (NS, &str) {
(self.namespace, &self.name)
}
}
#[allow(clippy::needless_lifetimes)]
impl<'a, R: BufRead> Debug for Element<'a, R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut buffer = String::new();
append_element_start(&mut buffer, &self.name, &self.attributes);
writeln!(f, "{}", buffer)
}
}
pub(crate) struct ElementIter<'a, R: BufRead> {
source: &'a ElementSource<R>,
depth: u32,
}
impl<'a, R: BufRead> Iterator for ElementIter<'a, R> {
type Item = XmlResult<Element<'a, R>>;
fn next(&mut self) -> Option<Self::Item> {
self.source.next_element_at_depth(self.depth).transpose()
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum NS {
Atom,
RSS,
Unknown,
Content,
DublinCore,
MediaRSS,
Itunes,
}
impl NS {
fn parse(s: &str) -> NS {
match s {
"http://purl.org/rss/1.0/" => NS::RSS,
"http://www.w3.org/2005/Atom" => NS::Atom,
"http://purl.org/rss/1.0/modules/content/" => NS::Content,
"http://purl.org/dc/elements/1.1/" => NS::DublinCore,
"http://search.yahoo.com/mrss/" => NS::MediaRSS,
"http://www.itunes.com/dtds/podcast-1.0.dtd" => NS::Itunes,
_ => NS::Unknown,
}
}
}
pub(crate) struct NameValue {
pub name: String,
pub value: String,
}
#[derive(Debug)]
pub enum XmlError {
Parser { e: quick_xml::Error },
Url { e: url::ParseError },
Encoding { e: quick_xml::encoding::EncodingError },
Escape { e: quick_xml::escape::EscapeError },
}
impl fmt::Display for XmlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
XmlError::Parser { e } => write!(f, "Parser error: {}", e),
XmlError::Url { e } => write!(f, "Url error: {}", e),
XmlError::Encoding { e } => write!(f, "Encoding error: {}", e),
XmlError::Escape { e } => write!(f, "Escape error: {}", e),
}
}
}
impl Error for XmlError {}
impl From<quick_xml::Error> for XmlError {
fn from(e: quick_xml::Error) -> Self {
XmlError::Parser { e }
}
}
impl From<url::ParseError> for XmlError {
fn from(e: url::ParseError) -> Self {
XmlError::Url { e }
}
}
impl From<quick_xml::encoding::EncodingError> for XmlError {
fn from(e: quick_xml::encoding::EncodingError) -> Self {
XmlError::Encoding { e }
}
}
impl From<quick_xml::escape::EscapeError> for XmlError {
fn from(e: quick_xml::escape::EscapeError) -> Self {
XmlError::Escape { e }
}
}
enum XmlEvent {
Start { namespace: NS, name: String, attributes: Vec<NameValue> },
End { name: String },
Text(String),
}
impl XmlEvent {
fn end<R: BufRead>(event: &BytesEnd, reader: &Reader<R>) -> XmlEvent {
let name = XmlEvent::parse_name(event.name().as_ref(), reader);
XmlEvent::End { name }
}
fn parse_name<R: BufRead>(bytes: &[u8], reader: &Reader<R>) -> String {
reader
.decoder()
.decode(bytes)
.ok()
.and_then(|name| name.split(':').next_back().map(str::to_string))
.unwrap_or_default()
}
fn start<R: BufRead>(namespace: NS, event: &BytesStart, reader: &Reader<R>) -> XmlEvent {
let name = XmlEvent::parse_name(event.name().as_ref(), reader);
let attributes = event
.attributes()
.filter_map(|a| {
if let Ok(a) = a {
let name = match reader.decoder().decode(a.key.as_ref()) {
Ok(decoded) => decoded,
Err(_) => return None,
};
let decoded_value = match reader.decoder().decode(&a.value) {
Ok(decoded) => decoded,
Err(_) => return None,
};
let value = quick_xml::escape::unescape(&decoded_value)
.unwrap_or_else(|_| decoded_value.clone())
.to_string();
Some(NameValue { name: name.into(), value })
} else {
None
}
})
.collect::<Vec<NameValue>>();
XmlEvent::Start { namespace, name, attributes }
}
fn text<R: BufRead>(text: &BytesText, reader: &Reader<R>) -> XmlResult<Option<XmlEvent>> {
if text.is_empty() {
Ok(None)
} else {
let escaped_text = reader.decoder().decode(text)?;
let unescaped_text = quick_xml::escape::unescape(&escaped_text)?;
Ok(Some(XmlEvent::Text(unescaped_text.to_string())))
}
}
fn text_from_cdata<R: BufRead>(cdata: &BytesCData, reader: &Reader<R>) -> XmlResult<Option<XmlEvent>> {
if cdata.is_empty() {
Ok(None)
} else {
let decoded_text = reader.decoder().decode(cdata)?;
Ok(Some(XmlEvent::Text(decoded_text.to_string())))
}
}
}
fn append_element_end(buffer: &mut String, name: &str) {
buffer.push_str("</");
buffer.push_str(name);
buffer.push('>');
}
fn append_element_start(buffer: &mut String, name: &str, attributes: &[NameValue]) {
buffer.push('<');
buffer.push_str(name);
for attr in attributes {
buffer.push(' ');
buffer.push_str(attr.name.as_str());
buffer.push_str("=\"");
buffer.push_str(attr.value.as_str());
buffer.push('"');
}
buffer.push('>');
}
fn append_element_text(buffer: &mut String, text: &str) {
buffer.push_str(text);
}