mod attrs;
mod token;
use crate::{data::VOID_TAGS, Element, Node};
use token::Token;
fn html_to_stack(html: &str) -> Result<Vec<Token>, String> {
let mut chars_stack = Vec::<char>::new();
let mut token_stack = Vec::<Token>::new();
let mut in_quotes: Option<char> = None;
let mut in_brackets = false;
let mut in_comment = false;
let mut in_script = false;
let mut in_style = false;
for ch in html.chars() {
if let Some(quote) = in_quotes {
if ch == quote {
let previous_char = *chars_stack
.last()
.expect("cannot get the last char in chars stack");
if previous_char != '\\' {
in_quotes = None;
}
}
chars_stack.push(ch);
} else if in_comment {
chars_stack.push(ch);
if ends_with(&chars_stack, &['-', '-', '>']) {
let comment = String::from_iter(chars_stack);
chars_stack = Vec::new();
token_stack.push(Token::from_comment(comment));
in_comment = false;
in_brackets = false;
}
} else if in_script {
chars_stack.push(ch);
let len = chars_stack.len();
if ends_with(&chars_stack, &['<', '/', 's', 'c', 'r', 'i', 'p', 't', '>']) {
let script = String::from_iter(chars_stack[..len - 9].to_vec());
chars_stack = Vec::new();
token_stack.push(Token::Text(script));
token_stack.push(Token::End("script".to_string()));
in_script = false;
}
} else if in_style {
chars_stack.push(ch);
let len = chars_stack.len();
if ends_with(&chars_stack, &['<', '/', 's', 't', 'y', 'l', 'e', '>']) {
let style = String::from_iter(chars_stack[..len - 8].to_vec());
chars_stack = Vec::new();
token_stack.push(Token::Text(style));
token_stack.push(Token::End("style".to_string()));
in_style = false;
}
} else {
match ch {
'<' => {
in_brackets = true;
if !chars_stack.is_empty() {
let txt_text = String::from_iter(chars_stack);
chars_stack = Vec::new();
token_stack.push(Token::Text(txt_text));
}
chars_stack.push(ch);
}
'>' => {
in_brackets = false;
chars_stack.push(ch);
let tag_text = String::from_iter(chars_stack);
chars_stack = Vec::new();
let tag = Token::from(tag_text.clone())?;
token_stack.push(tag.clone());
if let Token::Start(tag_name, _) = tag {
let tag_name = tag_name.as_str();
match tag_name {
"script" => in_script = true,
"style" => in_style = true,
_ => {}
}
}
}
'-' => {
chars_stack.push(ch);
if chars_stack == ['<', '!', '-', '-'] {
in_comment = true;
}
}
_ => {
if in_brackets {
match ch {
'\'' => in_quotes = Some('\''),
'\"' => in_quotes = Some('\"'),
_ => {}
}
}
chars_stack.push(ch)
}
}
}
}
if !chars_stack.is_empty() {
let text = String::from_iter(chars_stack);
token_stack.push(Token::Text(text));
}
Ok(token_stack)
}
fn stack_to_dom(token_stack: Vec<Token>) -> Result<Vec<Node>, String> {
let mut nodes: Vec<Node> = Vec::new();
let mut start_tags_stack: Vec<Token> = Vec::new();
let mut start_tag_index = 0;
for (i, token) in token_stack.iter().enumerate() {
match token {
Token::Start(tag, attrs) => {
let is_void_tag = VOID_TAGS.contains(&tag.as_str());
if is_void_tag {
if start_tags_stack.is_empty() {
nodes.push(
Element {
name: tag.clone(),
attrs: attrs.clone(),
children: Vec::new(),
}
.into_node(),
);
} else {
}
} else {
if start_tags_stack.is_empty() {
start_tag_index = i;
}
start_tags_stack.push(Token::Start(tag.clone(), attrs.clone()));
}
}
Token::End(tag) => {
let start_tag = match start_tags_stack.pop() {
Some(token) => token.into_element(),
None => return Err(format!("No start tag matches </{}>", tag)),
};
if start_tags_stack.is_empty() {
if tag != &start_tag.name {
return Err(format!(
"<{}> does not match the </{}>",
start_tag.name, tag
));
}
nodes.push(
Element {
name: start_tag.name,
attrs: start_tag.attrs,
children: stack_to_dom(token_stack[start_tag_index + 1..i].to_vec())?,
}
.into_node(),
)
}
}
_ => {
if start_tags_stack.is_empty() {
nodes.push(token.node());
}
}
}
}
match start_tags_stack.pop() {
Some(token) => {
let start_tag_name = token.into_element().name;
Err(format!("<{}> is not closed", start_tag_name))
}
None => Ok(nodes),
}
}
fn try_stack_to_dom(token_stack: Vec<Token>) -> Vec<Node> {
let mut nodes: Vec<Node> = Vec::new();
let mut start_tags_stack: Vec<Token> = Vec::new();
let mut start_tag_index = 0;
for (i, token) in token_stack.iter().enumerate() {
match token {
Token::Start(tag, attrs) => {
let is_void_tag = VOID_TAGS.contains(&tag.as_str());
if is_void_tag {
if start_tags_stack.is_empty() {
nodes.push(
Element {
name: tag.clone(),
attrs: attrs.clone(),
children: Vec::new(),
}
.into_node(),
);
} else {
}
} else {
if start_tags_stack.is_empty() {
start_tag_index = i;
}
start_tags_stack.push(Token::Start(tag.clone(), attrs.clone()));
}
}
Token::End(tag) => {
let start_tag = match start_tags_stack.pop() {
Some(token) => token.into_element(),
None => continue,
};
if tag != &start_tag.name {
start_tags_stack.push(Token::Start(start_tag.name, start_tag.attrs));
continue;
}
if start_tags_stack.is_empty() {
nodes.push(
Element {
name: start_tag.name,
attrs: start_tag.attrs,
children: try_stack_to_dom(
token_stack[start_tag_index + 1..i].to_vec(),
),
}
.into_node(),
)
}
}
_ => {
if start_tags_stack.is_empty() {
nodes.push(token.node());
}
}
}
}
if !start_tags_stack.is_empty() {
if let Token::Start(name, attrs) = start_tags_stack[0].clone() {
nodes.push(Node::Element(Element {
name,
attrs,
children: try_stack_to_dom(token_stack[start_tag_index + 1..].to_vec()),
}));
} else {
unreachable!()
}
}
nodes
}
pub fn parse(html: &str) -> Result<Vec<Node>, String> {
let stack = html_to_stack(html)?;
stack_to_dom(stack)
}
pub fn try_parse(html: &str) -> Vec<Node> {
let stack = html_to_stack(html).unwrap_or_default();
try_stack_to_dom(stack)
}
fn ends_with(chars: &Vec<char>, end: &[char]) -> bool {
let chars_len = chars.len();
let end_len = end.len();
for i in 1..(end_len + 1) {
if chars[chars_len - i] != end[end.len() - i] {
return false;
}
}
true
}