use core::marker::Copy;
use alloc::borrow::ToOwned;
use alloc::format;
use alloc::str::from_utf8;
use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;
use memchr::{memchr, memchr3_iter};
use crate::parsers::{extract, FromSlice};
use crate::record::StateMetadata;
use crate::EtError;
use crate::{impl_reader, impl_record};
#[derive(Clone, Copy, Debug)]
pub enum XmlTagType {
Open,
Close,
SelfClose,
}
impl Default for XmlTagType {
fn default() -> Self {
XmlTagType::Open
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct XmlTag<'r> {
tag_type: XmlTagType,
id: &'r str,
}
impl<'b: 's, 's> FromSlice<'b, 's> for XmlTag<'r> {
type State = ();
fn parse(
rb: &[u8],
eof: bool,
consumed: &mut usize,
_state: &mut Self::State,
) -> Result<bool, EtError> {
let mut cur_quote = b' ';
let mut start = 0;
let end = 'read: loop {
for i in memchr3_iter(b'>', b'"', b'\'', &rb[start..]) {
match (rb[i], cur_quote) {
(b'>', b' ') => break 'read i + 1,
(b'\'', b' ') => cur_quote = b'\'',
(b'"', b' ') => cur_quote = b'"',
(b'\'', b'\'') => cur_quote = b' ',
(b'"', b'"') => cur_quote = b' ',
_ => {}
}
}
if rb.len() > 1024 {
return Err(format!("Tags larger than {} not supported", 1024).into());
}
if eof {
return Err("Tag was never closed".into());
}
start = rb.len() - 1;
};
*consumed += end;
Ok(true)
}
fn get(
&mut self,
buf: &'r [u8],
_state: &Self::State,
) -> Result<(), EtError> {
let is_closing = buf.get(1) == Some(&b'/');
let is_self_closing = buf.last() == Some(&b'/');
let (tag_type, data) = match (is_closing, is_self_closing) {
(true, true) => return Err(EtError::from("Tag can not start and end with '/'")),
(true, false) => (XmlTagType::Close, &buf[2..buf.len() - 1]),
(false, true) => (XmlTagType::SelfClose, &buf[1..buf.len() - 2]),
(false, false) => (XmlTagType::Open, &buf[1..buf.len() - 1]),
};
let id_end = memchr(b' ', data).unwrap_or(data.len());
self.tag_type = tag_type;
self.id = from_utf8(&data[..id_end])?;
Ok(())
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct XmlText<'r>(&'r str);
impl<'b: 's, 's> FromSlice<'b, 's> for XmlText<'r> {
type State = ();
fn parse(
rb: &[u8],
eof: bool,
consumed: &mut usize,
_state: &mut Self::State,
) -> Result<bool, EtError> {
if let Some(e) = memchr(b'<', rb) {
*consumed += e;
return Ok(true);
}
if rb.len() > 65536 {
return Err(
format!("XML text larger than {} not supported", 65536).into()
);
}
if eof {
*consumed += rb.len();
return Ok(true);
}
Ok(false)
}
fn get(
&mut self,
buf: &'r [u8],
_state: &Self::State,
) -> Result<(), EtError> {
self.0 = from_utf8(buf)?;
Ok(())
}
}
#[derive(Clone, Debug, Default)]
pub struct XmlState {
stack: Vec<String>,
is_text: bool,
}
impl StateMetadata for XmlState {
fn header(&self) -> Vec<&str> {
vec!["tags", "text"]
}
}
impl<'b: 's, 's> FromSlice<'b, 's> for XmlState {
type State = ();
}
#[derive(Clone, Debug, Default)]
pub struct XmlRecord<'r> {
tags: Vec<String>,
text: &'r str,
}
impl<'b: 's, 's> FromSlice<'b, 's> for XmlRecord<'r> {
type State = &'r mut XmlState;
fn parse(rb: &[u8], eof: bool, consumed: &mut usize, state: &mut Self::State) -> Result<bool, EtError> {
if rb.is_empty() {
if !state.stack.is_empty() {
return Err(format!("Closing tag for {} not present?", state.stack.pop().unwrap()).into());
} else {
return Ok(false);
}
}
let con = &mut 0;
if rb[0] == b'<' {
let tag = extract::<XmlTag>(rb, con, ())?;
match tag.tag_type {
XmlTagType::Open => {
state.stack.push(tag.id.to_owned());
}
XmlTagType::Close => {
if let Some(open_tag) = state.stack.pop() {
if open_tag != tag.id {
return Err(
format!("Closing tag {} found, but {} was open.", tag.id, open_tag).into()
);
}
} else {
return Err(
format!(
"Closing tag {} found, but no tags opened before it.",
tag.id
).into()
);
}
}
XmlTagType::SelfClose => {}
}
state.is_text = false;
} else {
if XmlText::parse(rb, eof, con, &mut ())? {
state.is_text = true;
} else {
return Ok(false);
}
}
*consumed += *con;
Ok(true)
}
fn get(
&mut self,
rb: &'r [u8],
state: &Self::State,
) -> Result<(), EtError> {
self.text = if state.is_text {
from_utf8(rb)?
} else {
""
};
self.tags = state.stack.clone();
Ok(())
}
}
impl_record!(XmlRecord<'r>: tags, text);
impl_reader!(XmlReader, XmlRecord, XmlRecord<'r>, XmlState, ());
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_xml_reader() -> Result<(), EtError> {
let data: &[u8] = b"<a>test</a>";
let mut reader = XmlReader::new(data, ())?;
let rec = reader.next()?.unwrap();
assert_eq!(rec.tags, &["a"]);
let rec = reader.next()?.unwrap();
assert_eq!(rec.tags, &["a"]);
let rec = reader.next()?.unwrap();
assert!(rec.tags.is_empty());
assert!(reader.next()?.is_none());
Ok(())
}
}