mod input;
use std::collections::HashMap;
use crate::dom::comment::Comment;
use crate::dom::tag::Tag;
use crate::dom::text::Text;
use crate::dom::Dom;
use crate::dom::DomType;
use input::Input;
pub fn parse(doc: &str) -> Result<Dom, String> {
let mut input = Input::new(doc);
let mut dom_vec = create_dom_vec(&mut input)?;
let mut root_dom = Dom::new_root();
create_dom_tree(&mut dom_vec, &mut root_dom);
Ok(root_dom)
}
fn parse_tag_attr_value(input: &mut Input, tag_end: usize, dlmt: char) -> Result<String, String> {
if dlmt != ' ' {
input.next(); }
let value_bgn = input.get_cursor();
let value_end;
match input.find(dlmt) {
Some(cursor) => {
if cursor < tag_end {
value_end = cursor;
} else if dlmt == ' ' {
value_end = tag_end;
} else {
return Err(format!(
"There is no delimiter({}) to terminate the attribute.",
dlmt
));
}
}
None => {
return Err(format!("Input ends in the middle of delimiter({})", dlmt));
}
}
if value_bgn == value_end {
return Ok(String::new());
}
input.set_cursor(value_end);
input.get_string(value_bgn, value_end)
}
fn get_tag_end(input: &mut Input) -> Result<usize, String> {
let save_cursol_pos = input.get_cursor();
let mut res = 0;
let mut in_dquote = false;
while !input.is_end() {
input.next_char();
if input.expect('\"') {
in_dquote = !in_dquote;
}
if !in_dquote && input.expect('>') {
res = input.get_cursor();
break;
}
}
input.set_cursor(save_cursol_pos);
match res {
0 => return Err(String::from("Input ends in the middle of the tag")),
_ => return Ok(res),
}
}
fn parse_tag_attr(input: &mut Input, mut tag: Tag) -> Result<Tag, String> {
let tag_end = get_tag_end(input)?;
let mut attr_map = HashMap::new();
loop {
if input.expect('>') {
input.next();
break;
}
let attr_bgn = input.get_cursor();
let mut attr_end = tag_end;
if let Some(cursor) = input.find('=') {
if cursor < tag_end {
attr_end = cursor;
}
}
if let Some(cursor) = input.find(' ') {
if cursor < attr_end {
attr_end = cursor;
}
}
input.set_cursor(attr_end);
let attr_name = input.get_string(attr_bgn, attr_end)?;
let mut value = String::new();
if input.get_cursor() != tag_end {
if let Some(cursor) = input.find('=') {
if cursor < tag_end {
input.set_cursor(cursor); input.next_char(); if input.expect('"') {
match parse_tag_attr_value(input, tag_end, '"') {
Ok(v) => value = v,
Err(e) => return Err(e),
}
} else if input.expect('\'') {
match parse_tag_attr_value(input, tag_end, '\'') {
Ok(v) => value = v,
Err(e) => return Err(e),
}
} else {
match parse_tag_attr_value(input, tag_end, ' ') {
Ok(v) => value = v,
Err(e) => return Err(e),
}
}
}
}
}
attr_map.insert(attr_name, value);
if input.expect('>') {
input.next();
break;
}
input.next_char();
}
if let Some(_) = attr_map.remove("/") {
tag.set_terminated(true);
}
tag.set_attrs(attr_map);
Ok(tag)
}
fn parse_tag_name(input: &mut Input, terminator: bool) -> Result<Tag, String> {
let name_bgn = input.get_cursor();
let tag_end = get_tag_end(input)?;
let mut name_end = tag_end;
if let Some(cursor) = input.find(' ') {
if cursor < tag_end {
name_end = cursor;
}
}
input.set_cursor(name_end);
let tag_name = input.get_string(name_bgn, name_end)?;
let tag_name = tag_name.trim();
let mut tag = Tag::new(tag_name);
tag.set_terminator(terminator);
if input.expect('>') {
input.next(); return Ok(tag);
}
input.next_char();
if input.expect('>') {
input.next();
return Ok(tag);
}
return parse_tag_attr(input, tag);
}
fn parse_tag(input: &mut Input) -> Result<Dom, String> {
input.next();
let mut terminator = false;
if input.expect('/') {
input.next(); terminator = true;
}
let tag = parse_tag_name(input, terminator)?;
let mut dom = Dom::new(DomType::Tag);
dom.set_tag(tag);
return Ok(dom);
}
fn parse_comment(input: &mut Input) -> Result<Dom, String> {
let bgn = input.get_cursor() + "<!--".len();
match input.find_str("-->") {
Some(cursor) => {
input.set_cursor(cursor + "-->".len()); let comment = Comment::new(&input.get_string(bgn, cursor)?);
let mut dom = Dom::new(DomType::Comment);
dom.set_comment(comment);
return Ok(dom);
}
None => return Err(String::from("Input ends in the middle of the comment")),
}
}
fn parse_text(input: &mut Input) -> Result<Dom, String> {
let bgn = input.get_cursor();
let end;
match input.find('<') {
Some(cursor) => {
input.set_cursor(cursor);
end = cursor;
}
None => {
input.next_char();
end = input.get_cursor();
}
}
let text = Text::new(&input.get_string(bgn, end)?);
let mut dom = Dom::new(DomType::Text);
dom.set_text(text);
return Ok(dom);
}
fn parse_text_script(input: &mut Input) -> Result<Dom, String> {
let bgn = input.get_cursor();
let end;
match input.find_str("</script") {
Some(cursor) => {
input.set_cursor(cursor);
end = cursor;
}
None => return Err(String::from("Input ends in the middle of the tag")),
}
let text = Text::new(&input.get_string(bgn, end)?);
let mut dom = Dom::new(DomType::Text);
dom.set_text(text);
return Ok(dom);
}
#[allow(dead_code)]
fn parse_doctype(input: &mut Input) -> Result<Dom, String> {
if !input.expect_str_insensitive("<!doctype html>") {
return Err(String::from("Input is not html"));
}
input.next(); input.next(); let bgn = input.get_cursor();
let end = bgn + "doctype".len();
input.set_cursor(end); let mut tag = Tag::new(&input.get_string(bgn, end)?);
input.next();
let mut attr: HashMap<String, String> = HashMap::new();
let bgn = input.get_cursor();
let end = bgn + "html".len();
input.set_cursor(end); attr.insert(input.get_string(bgn, end)?, String::new());
tag.set_attrs(attr);
let mut dom = Dom::new(DomType::Tag);
dom.set_tag(tag);
input.next();
Ok(dom)
}
fn create_dom_vec(input: &mut Input) -> Result<Vec<Dom>, String> {
let mut dom_vec: Vec<Dom> = Vec::new();
while !input.expect('<') {
input.next_char();
}
while !input.is_end() {
if input.expect_str("<!--") {
match parse_comment(input) {
Ok(dom) => dom_vec.push(dom),
Err(e) => return Err(e),
}
} else if input.expect('<') {
match parse_tag(input) {
Ok(dom) => {
let mut is_bgn_script = false;
if let DomType::Tag = dom.dom_type {
let tag = dom.get_tag().unwrap();
if tag.get_name() == "script" && !tag.is_terminator() {
is_bgn_script = true;
}
}
dom_vec.push(dom);
if is_bgn_script && !input.expect('<') {
match parse_text_script(input) {
Ok(dom) => dom_vec.push(dom),
Err(e) => return Err(e),
}
}
}
Err(e) => return Err(e),
}
} else {
if input.expect(' ') || input.expect('\n') {
input.next_char(); }
if !input.expect('<') {
match parse_text(input) {
Ok(dom) => dom_vec.push(dom),
Err(e) => return Err(e),
}
}
}
}
Ok(dom_vec)
}
#[allow(dead_code)]
fn debug_print_dom_vec(dom_vec: &Vec<Dom>) {
for dom in dom_vec.iter() {
match dom.dom_type {
DomType::Tag => println!("{:#?}", dom.get_tag().unwrap()),
DomType::Text => println!("{:#?}", dom.get_text().unwrap()),
DomType::Comment => println!("{:#?}", dom.get_comment().unwrap()),
}
}
}
fn search_terminator(dom_vec: &mut Vec<Dom>, starter: &Tag) -> Option<usize> {
let mut i = 0;
while i < dom_vec.len() {
let dom = dom_vec.get(i).unwrap();
if let DomType::Tag = dom.dom_type {
let tag = dom.get_tag().unwrap();
if tag.is_terminator() {
if starter.get_name() == tag.get_name() {
return Some(i);
}
}
}
i += 1;
}
None
}
fn create_dom_tree(dom_vec: &mut Vec<Dom>, parent: &mut Dom) {
while !dom_vec.is_empty() {
let mut dom = dom_vec.remove(0);
if let DomType::Tag = dom.dom_type {
let tag = dom.get_tag().unwrap();
if tag.is_terminator() {
return;
}
if !tag.is_terminated() {
if let Some(terminator_idx) = search_terminator(dom_vec, tag) {
if terminator_idx == 0 {
dom_vec.remove(0);
} else {
create_dom_tree(dom_vec, &mut dom);
}
}
}
}
parent.add_child(dom);
}
}
pub fn print_dom_tree(dom: &Dom) {
print_dom_tree_exe(dom, 0);
}
fn print_dom_tree_exe(dom: &Dom, depth: usize) {
for _ in 0..depth {
print!(" ");
}
match dom.dom_type {
DomType::Tag => {
let tag = dom.get_tag().unwrap();
print!("<{}", tag.get_name());
if let Some(attrs) = tag.get_attrs() {
for (attr, value) in attrs.iter() {
print!(" {}=\"{}\"", attr, value);
}
}
println!(">");
if let Some(children) = dom.get_children() {
for child in children {
print_dom_tree_exe(child, depth + 1);
}
}
}
DomType::Text => {
let text = dom.get_text().unwrap();
let text = String::from(text.get_text());
let text = text.replace("\n", "\\n");
println!("TEXT: \"{}\"", text);
}
DomType::Comment => {
let comment = dom.get_comment().unwrap();
let comment = String::from(comment.get_comment());
let comment = comment.replace("\n", "\\n");
println!("<!--\"{}\"-->", comment);
}
}
}