pub struct ReaderForMicroXml<'a> {
input: &'a str,
indices: core::str::CharIndices<'a>,
tag_state: TagState,
last_char: (usize, char),
}
#[derive(Clone, Debug)]
pub enum Event<'a> {
StartElement(&'a str),
EndElement(&'a str),
Attribute(&'a str, &'a str),
TextNode(&'a str),
Error(&'static str),
Eof,
}
enum TagState {
OutsideOfTag,
InsideOfTag,
Eof,
}
impl<'a> ReaderForMicroXml<'_> {
pub fn new(input: &str) -> ReaderForMicroXml {
let mut indices = input.char_indices();
let mut last_char = (0, ' ');
if input.is_empty() {
last_char = indices.next().unwrap();
}
ReaderForMicroXml {
input,
indices,
tag_state: TagState::OutsideOfTag,
last_char,
}
}
pub fn read_event(&mut self) -> Event {
match self.read_event_internal() {
Some(x) => x,
None => Event::Error("Eof on incorrect position."),
}
}
#[allow(clippy::integer_arithmetic, clippy::nonminimal_bool)]
fn read_event_internal(&mut self) -> Option<Event> {
match &self.tag_state {
TagState::OutsideOfTag => {
let (pos, _ch) = self.get_last_char();
let start_pos = pos;
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if ch == '<' {
self.tag_state = TagState::InsideOfTag;
self.move_next_char()?;
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if !(ch == '!' || ch == '/') {
self.read_element_name()
} else if ch == '!' {
self.skip_comment()?;
return Some(self.read_event());
} else {
self.read_end_element()
}
} else {
self.read_text_node(start_pos)
}
}
TagState::InsideOfTag => {
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if !(ch == '/' || ch == '>') {
self.read_attribute()
} else if ch == '/' {
self.move_next_char()?; let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if ch != '>' {
Some(Event::Error("Tag has / but not />"))
} else {
self.tag_state = TagState::OutsideOfTag;
self.move_next_char()?;
return Some(Event::EndElement(""));
}
} else {
self.move_next_char()?;
self.tag_state = TagState::OutsideOfTag;
return Some(self.read_event());
}
}
TagState::Eof => {
return Some(Event::Eof);
}
}
}
fn read_element_name(&mut self) -> Option<Event> {
let (pos, _ch) = self.skip_whitespace_and_get_last_char()?;
let start_pos = pos;
let end_pos;
loop {
let (pos, ch) = self.get_last_char();
if ch.is_whitespace() || ch == '/' || ch == '>' {
end_pos = pos;
break;
} else {
self.move_next_char()?;
}
}
self.skip_whitespace_and_get_last_char()?;
self.tag_state = TagState::InsideOfTag;
return Some(Event::StartElement(
self.input.get(start_pos..end_pos).unwrap(),
));
}
fn read_attribute(&mut self) -> Option<Event> {
let (pos, _ch) = self.skip_whitespace_and_get_last_char()?;
let start_pos = pos;
let end_pos;
loop {
let (pos, ch) = self.get_last_char();
if ch.is_whitespace() || ch == '=' {
end_pos = pos;
break;
} else {
self.move_next_char()?;
}
}
let attr_name = self.input.get(start_pos..end_pos).unwrap();
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if ch == '=' {
self.move_next_char()?;
}
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if ch == '"' {
self.move_next_char()?;
} else {
return Some(Event::Error("Attribute does not have the char = ."));
}
let (pos, _ch) = self.get_last_char();
let start_pos = pos;
let end_pos;
loop {
let (pos, ch) = self.get_last_char();
if ch == '"' {
end_pos = pos;
self.move_next_char()?;
break;
} else {
self.move_next_char()?;
}
}
self.skip_whitespace_and_get_last_char()?;
let attr_value = self.input.get(start_pos..end_pos).unwrap();
Some(Event::Attribute(attr_name, attr_value))
}
fn read_end_element(&mut self) -> Option<Event> {
self.move_next_char()?;
let (pos, _ch) = self.skip_whitespace_and_get_last_char()?;
let start_pos = pos;
let end_pos;
loop {
let (pos, ch) = self.get_last_char();
if ch.is_whitespace() || ch == '>' {
end_pos = pos;
break;
} else {
self.move_next_char()?;
}
}
let (_pos, ch) = self.skip_whitespace_and_get_last_char()?;
if ch == '>' {
match self.move_next_char() {
Some(_x) => match self.skip_whitespace_and_get_last_char() {
Some(_x) => {
self.tag_state = TagState::OutsideOfTag;
}
None => {
self.tag_state = TagState::Eof;
}
},
None => {
self.tag_state = TagState::Eof;
}
}
return Some(Event::EndElement(
self.input.get(start_pos..end_pos).unwrap(),
));
} else {
return Some(Event::Error("End Element does not have > ."));
}
}
fn read_text_node(&mut self, start_pos: usize) -> Option<Event> {
let (_pos, _ch) = self.get_last_char();
let end_pos;
loop {
let (pos, ch) = self.get_last_char();
if ch == '<' {
end_pos = pos;
self.tag_state = TagState::OutsideOfTag;
break;
} else {
self.move_next_char()?;
}
}
return Some(Event::TextNode(self.input.get(start_pos..end_pos).unwrap()));
}
fn skip_comment(&mut self) -> Option<usize> {
self.move_next_char()?; self.move_next_char()?;
let mut ch1 = ' ';
let mut ch2 = ' ';
loop {
let (_pos, ch3) = self.get_last_char();
if ch1 == '-' && ch2 == '-' && ch3 == '>' {
self.move_next_char()?;
break;
} else {
ch1 = ch2;
ch2 = ch3;
self.move_next_char()?;
}
}
self.skip_whitespace_and_get_last_char()?;
self.tag_state = TagState::OutsideOfTag;
Some(0)
}
fn move_next_char(&mut self) -> Option<usize> {
self.last_char = self.indices.next()?;
Some(0)
}
fn get_last_char(&self) -> (usize, char) {
self.last_char
}
fn skip_whitespace_and_get_last_char(&mut self) -> Option<(usize, char)> {
loop {
let (pos, ch) = self.get_last_char();
if !ch.is_whitespace() {
return Some((pos, ch));
} else {
self.move_next_char()?;
}
}
}
}