#[derive(Debug)]
pub enum Event {
StartElement(String),
EndElement(String),
TextContent(String),
Attribute(String, String),
}
pub struct Lexer {
content: Vec<char>,
tag_stack: Vec<String>,
events: Vec<Event>,
}
impl Lexer {
pub fn new(content: String) -> Self {
Self {
content: content.chars().collect::<Vec<char>>(),
tag_stack: Vec::new(),
events: Vec::new(),
}
}
pub fn parse(&mut self) -> &[Event] {
while !self.content.is_empty() {
self.take_whitespaces();
if self.content.is_empty() {
break;
}
if self.content.len() > 1 && self.content[0] == '<' && self.content[1] == '/' {
self.take_end_element();
}
if !self.content.is_empty() && self.content[0] == '<' {
self.take_start_element();
}
if !self.content.is_empty() {
self.take_text_content();
}
}
&self.events
}
fn take_end_element(&mut self) {
let tag_name = self.take_tag_name(2);
self.events.push(Event::EndElement(tag_name.clone()));
if self.tag_stack.last() == Some(&tag_name) {
self.tag_stack.pop();
} else {
eprintln!("ERROR: Invalid closing tag `{tag_name}`.")
}
self.take_whitespaces();
if self.content[0] == '>' {
self.get_slice(0, 1);
} else {
eprintln!("ERROR: Invalid closing tag with extra args.");
}
}
fn take_start_element(&mut self) {
let tag_name = self.take_tag_name(1);
self.events.push(Event::StartElement(tag_name.clone()));
self.tag_stack.push(tag_name);
self.take_attributes();
self.take_whitespaces();
if self.content.len() > 1 && self.content[0] == '/' && self.content[1] == '>' {
self.get_slice(0, 2);
if let Some(last_tag) = self.tag_stack.pop() {
self.events.push(Event::EndElement(last_tag));
} else {
eprintln!("ERROR: there is no tag.");
}
}
else if self.content[0] == '>' {
self.get_slice(0, 1);
} else {
eprintln!("ERROR: expected `>` on start element.");
}
}
fn take_tag_name(&mut self, start: usize) -> String {
self.take_while_from(start, |x| x.is_alphabetic() || x.is_alphanumeric())
}
fn take_text_content(&mut self) {
let value = self.take_while(|x| x != '<');
let value = value.replace("\n", "");
let value = value.replace("\t", "");
let value = value.trim().to_string();
if value.is_empty() {
return;
}
self.events.push(Event::TextContent(value));
}
fn take_attributes(&mut self) {
while (self.content[0] != '>') && (self.content[0] != '/') {
self.take_attribute()
}
}
fn take_attribute(&mut self) {
self.take_whitespaces();
let key =
self.take_while(|x| x.is_alphabetic() || x.is_alphanumeric() || x == '-' || x == '_');
let value = self.take_attribute_value();
self.events.push(Event::Attribute(key, value));
self.take_whitespaces();
}
fn take_attribute_value(&mut self) -> String {
if self.content.is_empty() || self.content[0] != '=' {
return String::from("");
}
self.get_slice(0, 1);
let mut qoute_count = 0;
let value = self.take_while(|x| {
if x == '"' {
qoute_count += 1;
return if qoute_count == 2 { false } else { true };
}
if qoute_count == 1 {
return true;
}
x != ' ' || x != '>' || x != '/'
});
if qoute_count == 0 {
return value;
}
self.get_slice(0, 1);
value[1..].to_string()
}
fn take_whitespaces(&mut self) {
self.take_while(|x| x.is_whitespace());
}
fn take_while<F>(&mut self, predict: F) -> String
where
F: FnMut(char) -> bool,
{
self.take_while_from(0, predict)
}
fn take_while_from<F>(&mut self, start: usize, mut predict: F) -> String
where
F: FnMut(char) -> bool,
{
let mut i = start;
while self.content.len() > i && predict(self.content[i]) {
i += 1;
}
self.get_slice(start, i)
}
fn get_slice(&mut self, from: usize, to: usize) -> String {
let value = self.content[from..to].iter().collect::<String>();
self.content = self.content[to..].to_vec();
value
}
}