#![allow(clippy::unnecessary_wraps)]
use crate::config::{ParseOptions, RenderOptions};
use crate::error::{ErrorKind, ParseError};
use crate::position::CodeRegion;
use crate::types::{BoxDynError, GenResult, HResult};
use htmlentity::entity::{decode_chars_to, encode_chars, EncodeType, Entity, EntitySet};
use lazy_static::lazy_static;
use std::collections::HashMap;
use std::fmt;
use std::rc::{Rc, Weak};
use std::{
cell::{Ref, RefCell},
env,
fs::File,
io::prelude::*,
io::BufReader,
path::Path,
};
const TAG_BEGIN_CHAR: char = '<';
const TAG_END_CHAR: char = '>';
const WS_CHAR: char = ' ';
const END_SLASH_CHAR: char = '/';
const EQUAL_CHAR: char = '=';
const DOUBLE_QUOTE_CHAR: char = '"';
const SINGLE_QUOTE_CHAR: char = '\'';
const EOF_CHAR: char = '\0';
const DASH_CHAR: char = '-';
const LEFT_BRACKET_CHAR: char = '[';
const RIGHT_BRACKET_CHAR: char = ']';
const ALLOC_CHAR_CAPACITY: usize = 200;
const ALLOC_NODES_CAPACITY: usize = 20;
const HTML_TAG_NAME: [char; 4] = ['h', 't', 'm', 'l'];
const PRE_TAG_NAME: [char; 3] = ['p', 'r', 'e'];
const SCRIPT_TAG_NAME: [char; 6] = ['s', 'c', 'r', 'i', 'p', 't'];
const STYLE_TAG_NAME: [char; 5] = ['s', 't', 'y', 'l', 'e'];
const TITLE_TAG_NAME: [char; 5] = ['t', 'i', 't', 'l', 'e'];
const TEXTAREA_TAG_NAME: [char; 8] = ['t', 'e', 'x', 't', 'a', 'r', 'e', 'a'];
lazy_static! {
static ref DETECT_CHAR_MAP: HashMap<DetectChar, Vec<char>> = {
use DetectChar::*;
let mut map = HashMap::new();
map.insert(Comment, vec![DASH_CHAR, DASH_CHAR]);
map.insert(DOCTYPE, vec!['D', 'O', 'C', 'T', 'Y', 'P', 'E']);
map.insert(
XMLCDATA,
vec![
LEFT_BRACKET_CHAR,
'C',
'D',
'A',
'T',
'A',
LEFT_BRACKET_CHAR,
],
);
map
};
static ref VOID_ELEMENTS: Vec<Vec<char>> = vec![
vec!['i', 'm', 'g'],
vec!['i', 'n', 'p', 'u', 't'],
vec!['m', 'e', 't', 'a'],
vec!['l', 'i', 'n', 'k'],
vec!['b', 'r'],
vec!['h', 'r'],
vec!['c', 'o', 'l'],
vec!['b', 'a', 's', 'e'],
vec!['p', 'a', 'r', 'a', 'm'],
vec!['s', 'o', 'u', 'r', 'c', 'e'],
vec!['a', 'r', 'e', 'a'],
vec!['e', 'm', 'b', 'e', 'd'],
vec!['t', 'r', 'a', 'c', 'k'],
vec!['w', 'b', 'r'],
];
static ref SPECIAL_TAG_MAP: HashMap<Vec<char>, SpecialTag> = {
use SpecialTag::*;
let mut map = HashMap::new();
map.insert(vec!['s', 'v', 'g'], Svg);
map.insert(vec!['m', 'a', 't', 'h'], MathML);
map
};
static ref MUST_QUOTE_ATTR_CHARS: Vec<char> = vec![
DOUBLE_QUOTE_CHAR,
SINGLE_QUOTE_CHAR,
TAG_BEGIN_CHAR,
TAG_END_CHAR,
EQUAL_CHAR,
'`',
];
}
fn chars_to_string(content: &[char]) -> String {
content.iter().collect::<String>()
}
fn create_parse_error(kind: ErrorKind, position: usize, context: &str) -> HResult {
let err = ParseError::new(kind, CodeRegion::from_context_index(context, position));
Err(err)
}
fn is_void_tag(name: &[char]) -> bool {
for cur_name in VOID_ELEMENTS.iter() {
if is_equal_chars(cur_name, name, &None) {
return true;
}
}
false
}
fn is_plain_text_tag(name: &[char], case: &Option<NameCase>) -> bool {
is_equal_chars(name, &TEXTAREA_TAG_NAME, case) || is_equal_chars(name, &TITLE_TAG_NAME, case)
}
fn is_script_or_style(name: &[char], case: &Option<NameCase>) -> bool {
is_equal_chars(name, &STYLE_TAG_NAME, case) || is_equal_chars(name, &SCRIPT_TAG_NAME, case)
}
pub fn is_content_tag(name: &[char], case: &Option<NameCase>) -> bool {
is_script_or_style(name, case) || is_plain_text_tag(name, case)
}
pub fn allow_insert(name: &[char], node_type: NodeType) -> bool {
let lc_name = name
.iter()
.map(|ch| ch.to_ascii_lowercase())
.collect::<Vec<char>>();
if is_void_tag(&lc_name) {
return false;
}
if is_plain_text_tag(name, &None) {
return node_type == NodeType::Text || node_type == NodeType::SpacesBetweenTag;
}
if node_type == NodeType::XMLCDATA {
return SPECIAL_TAG_MAP.get(&lc_name).is_some();
}
true
}
#[derive(PartialEq, Eq, Hash)]
pub enum DetectChar {
Comment,
DOCTYPE,
XMLCDATA,
}
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum NodeType {
AbstractRoot = 0, HTMLDOCTYPE = 1, Comment = 2, Text = 3, SpacesBetweenTag = 4, Tag = 5, TagEnd = 6, XMLCDATA = 7, }
impl Default for NodeType {
fn default() -> Self {
NodeType::AbstractRoot
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CodeTypeIn {
AbstractRoot, Unkown, UnkownTag, Tag, TagEnd, ExclamationBegin, Comment, HTMLDOCTYPE, EscapeableRawText, HTMLScript, HTMLStyle, XMLCDATA, TextNode, }
fn get_content_encode(content: &[char]) -> Vec<char> {
encode_chars(content, EntitySet::Html, EncodeType::Named)
}
fn chars_trim_end(target: &[char]) -> &[char] {
let mut end_index: usize = target.len();
for ch in target.iter().rev() {
if !ch.is_ascii_whitespace() {
break;
}
end_index -= 1;
}
&target[..end_index]
}
fn is_equal_chars(target: &[char], cmp: &[char], case: &Option<NameCase>) -> bool {
if target.len() != cmp.len() {
return false;
}
let is_equal = if let Some(case) = case {
match case {
NameCase::Lower => |a: &char, b: &char| -> bool { a == b || &a.to_ascii_lowercase() == b },
NameCase::Upper => |a: &char, b: &char| -> bool { a == b || &a.to_ascii_uppercase() == b },
}
} else {
|a: &char, b: &char| -> bool { a == b }
};
for (index, ch) in target.iter().enumerate() {
if !is_equal(ch, &cmp[index]) {
return false;
}
}
true
}
fn is_equal_chars_ignore_case(target: &[char], cmp: &[char]) -> (bool, bool) {
if target.len() != cmp.len() {
return (false, false);
}
let mut is_total_same = true;
for (index, ch) in target.iter().enumerate() {
let cmp_ch = &cmp[index];
if cmp_ch == ch {
continue;
}
is_total_same = false;
match cmp_ch {
'a'..='z' => {
if &cmp_ch.to_ascii_uppercase() != ch {
return (false, false);
}
}
'A'..='Z' => {
if &cmp_ch.to_ascii_lowercase() != ch {
return (false, false);
}
}
_ => {
return (false, false);
}
}
}
(true, is_total_same)
}
#[derive(Debug, Default)]
pub struct Attr {
pub key: Option<AttrData>,
pub value: Option<AttrData>,
pub quote: Option<char>,
pub need_quote: bool,
}
#[derive(Debug, Default)]
pub struct AttrData {
pub content: Vec<char>,
}
impl Attr {
pub fn build(&self, remove_quote: bool) -> Vec<char> {
let mut ret = Vec::with_capacity(ALLOC_CHAR_CAPACITY);
let mut has_key = false;
if let Some(AttrData { content, .. }) = &self.key {
ret.extend_from_slice(content);
has_key = true;
}
if let Some(AttrData { content, .. }) = &self.value {
if has_key {
ret.push(EQUAL_CHAR);
}
if let Some(quote) = self.quote {
if self.need_quote || !remove_quote {
ret.push(quote);
ret.extend_from_slice(content);
ret.push(quote);
return ret;
}
}
ret.extend_from_slice(content);
}
ret
}
pub fn need_quoted_char(ch: &char) -> bool {
ch.is_ascii_whitespace() || MUST_QUOTE_ATTR_CHARS.contains(ch)
}
pub fn check_if_id(&self) -> Option<String> {
if let Some(key) = &self.key {
if is_equal_chars(&key.content, &['i', 'd'], &Some(NameCase::Lower)) {
if let Some(value) = &self.value {
return Some(chars_to_string(&value.content));
}
}
}
None
}
}
pub enum NameCase {
Upper,
Lower,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TagCodeIn {
Wait,
Key,
KeyEnd,
WaitValue,
Value,
ValueEnd,
}
impl Default for TagCodeIn {
fn default() -> Self {
TagCodeIn::Wait
}
}
#[derive(Debug, Default)]
pub struct TagMeta {
code_in: TagCodeIn,
pub is_void: bool,
pub self_closed: bool,
pub auto_fix: bool,
pub name: Vec<char>,
pub attrs: Vec<Attr>,
pub lc_name_map: HashMap<String, usize>,
}
impl TagMeta {
pub fn attrs_to_string(&self, remove_quote: bool) -> Vec<char> {
self
.attrs
.iter()
.flat_map(|attr| {
let mut attr_content = attr.build(remove_quote);
attr_content.splice(0..0, vec![WS_CHAR]);
attr_content
})
.collect()
}
pub fn add_attr_key(&mut self) {
self.attrs.push(Attr {
key: Some(AttrData::default()),
..Default::default()
});
}
pub fn add_attr_value(&mut self, quote: Option<char>) {
self.attrs.push(Attr {
value: Some(AttrData::default()),
quote,
..Default::default()
});
}
pub fn set_attr_key(&mut self, key: Vec<char>) {
let attr = self
.attrs
.last_mut()
.expect("Attrs must not be empty when call set_attr_key");
attr.key = Some(AttrData { content: key });
}
pub fn set_attr_value(&mut self, value: Vec<char>, quote: Option<char>) -> &Attr {
let attr = self
.attrs
.last_mut()
.expect("Attrs must not be empty when call set_attr_key");
attr.value = Some(AttrData { content: value });
attr.quote = quote;
attr
}
}
pub type RefNode = Rc<RefCell<Node>>;
type RefDoc = Rc<RefCell<Doc>>;
#[derive(Default, Clone)]
struct RenderStatus {
inner_type: RenderStatuInnerType,
is_in_pre: bool,
root: bool,
}
#[derive(Clone)]
enum RenderStatuInnerType {
None,
Html,
Text,
}
impl Default for RenderStatuInnerType {
fn default() -> Self {
RenderStatuInnerType::None
}
}
#[derive(Default)]
pub struct Node {
pub index: usize,
pub node_type: NodeType,
pub begin_at: usize,
pub end_at: usize,
pub end_tag: Option<RefNode>,
pub prev: Option<Weak<RefCell<Node>>>,
pub parent: Option<Weak<RefCell<Node>>>,
pub root: Option<Weak<RefCell<Node>>>,
pub document: Option<Weak<RefCell<Doc>>>,
pub content: Option<Vec<char>>,
pub childs: Option<Vec<RefNode>>,
pub meta: Option<RefCell<TagMeta>>,
}
impl fmt::Debug for Node {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Node")
.field("index", &self.index)
.field("node_type", &self.node_type)
.field("begin_at", &self.begin_at)
.field("end_at", &self.end_at)
.field("content", &self.content)
.field("childs", &self.childs)
.field("meta", &self.meta)
.field("end_tag", &self.end_tag)
.field("parent", &self.parent.is_some())
.field("root", &self.root.is_some())
.field("document", &self.document.is_some())
.finish()
}
}
impl Node {
pub fn new(node_type: NodeType, code_at: usize) -> Self {
Node {
node_type,
begin_at: code_at,
end_at: code_at,
..Default::default()
}
}
pub fn create_text_node(content: Vec<char>, code_at: Option<usize>) -> Self {
let mut is_all_spaces = true;
for ch in &content {
if !ch.is_ascii_whitespace() {
is_all_spaces = false;
break;
}
}
let node_type = if is_all_spaces {
NodeType::SpacesBetweenTag
} else {
NodeType::Text
};
let code_at = code_at.unwrap_or_default();
Node {
node_type,
begin_at: code_at,
end_at: code_at,
content: Some(content),
..Default::default()
}
}
fn build_node(&self, options: &RenderOptions, status: &mut RenderStatus, result: &mut Vec<char>) {
let is_in_pre = status.is_in_pre;
let is_root = status.root;
let need_tag = if is_root {
matches!(status.inner_type, RenderStatuInnerType::None)
} else {
!matches!(status.inner_type, RenderStatuInnerType::Text)
};
use NodeType::*;
match self.node_type {
Text => {
let content = self
.content
.as_ref()
.expect("Text node's content must not empty");
if !is_in_pre && options.minify_spaces {
let mut prev_is_space = false;
if options.decode_entity {
let mut start_index: usize = 0;
let mut is_in_entity = false;
for (index, ch) in content.iter().enumerate() {
if !is_in_entity {
if ch == &'&' {
is_in_entity = true;
start_index = index;
} else {
if ch.is_ascii_whitespace() {
if prev_is_space {
continue;
}
prev_is_space = true;
} else {
prev_is_space = false;
}
result.push(*ch);
}
} else if ch == &';' {
let entity = &content[start_index + 1..index];
if let Some(decoded) = Entity::decode(entity) {
result.push(decoded);
} else {
result.push('&');
result.extend_from_slice(entity);
result.push(';');
}
is_in_entity = false;
}
}
if is_in_entity {
result.extend(&content[start_index..]);
}
} else {
for &c in content {
if c.is_ascii_whitespace() {
if prev_is_space {
continue;
}
prev_is_space = true;
} else {
prev_is_space = false;
}
result.push(c);
}
}
} else {
if options.decode_entity {
decode_chars_to(content, result);
} else {
result.extend_from_slice(content);
}
}
}
Tag => {
let meta = self
.meta
.as_ref()
.expect("tag's meta data must have.")
.borrow();
let tag_name = &meta.name;
status.is_in_pre =
is_in_pre || is_equal_chars(tag_name, &PRE_TAG_NAME, &Some(NameCase::Lower));
if need_tag {
result.push('<');
if !options.lowercase_tagname {
result.extend_from_slice(tag_name);
} else {
for ch in tag_name {
result.push(ch.to_ascii_lowercase());
}
}
if !meta.attrs.is_empty() {
let attrs = meta.attrs_to_string(options.remove_attr_quote);
result.extend_from_slice(&attrs);
}
if meta.self_closed || (meta.auto_fix && options.always_close_void) {
result.push(WS_CHAR);
result.push('/');
}
result.push(TAG_END_CHAR);
}
if let Some(content) = &self.content {
let need_encode =
options.encode_content && is_plain_text_tag(tag_name, &Some(NameCase::Lower));
if !need_encode {
result.extend_from_slice(content);
} else {
result.extend(get_content_encode(content));
}
}
}
TagEnd => {
let content = self
.content
.as_ref()
.expect("End tag's tag name must not empty");
let mut content = &content[..];
if is_in_pre
&& is_equal_chars(
chars_trim_end(content),
&PRE_TAG_NAME,
&Some(NameCase::Lower),
) {
status.is_in_pre = false;
}
if need_tag {
result.extend_from_slice(&['<', '/']);
if options.remove_endtag_space {
content = chars_trim_end(content);
}
if options.lowercase_tagname {
let content = content
.iter()
.map(|e| e.to_ascii_lowercase())
.collect::<Vec<char>>();
result.extend(content);
} else {
result.extend_from_slice(content);
}
result.push('>');
}
}
SpacesBetweenTag => {
if !is_in_pre && options.minify_spaces {
} else {
let content = self
.content
.as_ref()
.expect("Spaces between node must have whitespcaes");
result.extend_from_slice(content);
}
}
HTMLDOCTYPE => {
let meta = self
.meta
.as_ref()
.expect("tag's meta data must have.")
.borrow();
result.extend_from_slice(&['<', '!']);
result.extend_from_slice(&meta.name);
if !meta.attrs.is_empty() {
result.extend_from_slice(&meta.attrs_to_string(options.remove_attr_quote));
}
result.push('>');
}
Comment => {
if !options.remove_comment {
let is_inner_text = matches!(status.inner_type, RenderStatuInnerType::Text);
if is_root || !is_inner_text {
let need_wrap = !(is_root && is_inner_text);
if need_wrap {
result.extend_from_slice(&['<', '!', '-', '-']);
}
if let Some(content) = &self.content {
result.extend_from_slice(content);
}
if need_wrap {
result.extend_from_slice(&['-', '-', '>']);
}
}
}
}
XMLCDATA => {
result.extend_from_slice(&['<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[']);
if let Some(content) = &self.content {
result.extend_from_slice(content);
}
result.extend_from_slice(&[']', ']', '>']);
}
_ => {}
}
}
fn build_tree(&self, options: &RenderOptions, status: &mut RenderStatus, result: &mut Vec<char>) {
self.build_node(options, status, result);
if let Some(childs) = &self.childs {
if status.root {
let mut sub_status = status.clone();
sub_status.root = false;
for child in childs {
child.borrow().build_tree(options, &mut sub_status, result);
}
} else {
for child in childs {
child.borrow().build_tree(options, status, result);
}
}
}
if let Some(end_tag) = &self.end_tag {
end_tag.borrow().build_node(options, status, result);
}
}
pub fn build(&self, options: &RenderOptions, inner_text: bool) -> Vec<char> {
let inner_type = if inner_text {
if options.inner_html {
panic!("The 'inner_html' render option can't set true when 'inner_text' is true");
}
RenderStatuInnerType::Text
} else if options.inner_html {
RenderStatuInnerType::Html
} else {
RenderStatuInnerType::None
};
let throw_wrong_node = |node_type: &NodeType| -> ! {
panic!(
"`inner_html` should only used for tag node, but found '{:?}'",
node_type
);
};
let status = &mut RenderStatus {
inner_type,
root: true,
..Default::default()
};
let mut result: Vec<char> = Vec::with_capacity(50);
if matches!(
status.inner_type,
RenderStatuInnerType::Html | RenderStatuInnerType::Text
) {
if matches!(self.node_type, NodeType::AbstractRoot) {
if let Some(childs) = &self.childs {
let mut finded = false;
let mut child_node: Option<Rc<RefCell<Node>>> = None;
for child in childs {
if child.borrow().node_type == NodeType::Tag {
if finded {
panic!("`inner_html` can't used in abstract root node which has multiple tag node childs.");
}
child_node = Some(Rc::clone(child));
finded = true;
}
}
if let Some(child_node) = child_node {
child_node.borrow().build_tree(options, status, &mut result);
return result;
}
throw_wrong_node(&childs[childs.len() - 1].borrow().node_type);
}
return vec![];
}
match status.inner_type {
RenderStatuInnerType::Html => {
if self.node_type != NodeType::Tag {
throw_wrong_node(&self.node_type);
}
}
RenderStatuInnerType::Text => {
if !(matches!(
self.node_type,
NodeType::Tag | NodeType::Comment | NodeType::XMLCDATA
)) {
throw_wrong_node(&self.node_type);
}
}
_ => {}
}
}
self.build_tree(options, status, &mut result);
result
}
pub fn is_document(&self) -> (bool, bool) {
let mut is_document = false;
let mut syntax_ok = true;
use NodeType::*;
if self.node_type == AbstractRoot {
if let Some(childs) = &self.childs {
let mut find_html = false;
for child in childs {
let child_node = child.borrow();
match child_node.node_type {
Comment | SpacesBetweenTag | HTMLDOCTYPE => {
}
Tag => {
if find_html {
syntax_ok = false;
break;
} else {
if let Some(meta) = &child_node.meta {
if is_equal_chars(
meta.borrow().name.as_slice(),
&HTML_TAG_NAME,
&Some(NameCase::Lower),
) {
find_html = true;
is_document = true;
} else {
syntax_ok = false;
}
} else {
syntax_ok = false;
}
}
}
_ => {
syntax_ok = false;
}
}
}
}
if !is_document {
syntax_ok = true;
}
}
(is_document, syntax_ok)
}
pub fn is_same(cur: &RefNode, other: &RefNode) -> bool {
std::ptr::eq(cur.as_ptr() as *const _, other.as_ptr() as *const _)
}
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum SpecialTag {
MathML,
Svg,
}
type NextHandle = fn(&mut Doc, char, &str) -> HResult;
fn noop(_d: &mut Doc, _c: char, _content: &str) -> HResult {
Ok(())
}
fn parse_wait(doc: &mut Doc, c: char, _: &str) -> HResult {
match c {
TAG_BEGIN_CHAR => {
doc.mem_position = doc.position;
doc.set_code_in(CodeTypeIn::UnkownTag);
}
_ => {
if doc.parse_options.auto_fix_unexpected_endtag
&& matches!(
doc.current_node.borrow().node_type,
NodeType::Text | NodeType::SpacesBetweenTag
) {
let mut current_node = doc.current_node.borrow_mut();
if let Some(content) = &mut current_node.content {
doc.prev_chars.append(content);
}
if matches!(current_node.node_type, NodeType::Text) {
doc.repeat_whitespace = false;
} else {
doc.repeat_whitespace = c.is_ascii_whitespace();
}
} else {
doc.add_new_node(Rc::new(RefCell::new(Node::new(
NodeType::Text,
doc.position,
))));
doc.repeat_whitespace = c.is_ascii_whitespace();
}
doc.prev_chars.push(c);
doc.set_code_in(CodeTypeIn::TextNode);
}
}
Ok(())
}
fn parse_text(doc: &mut Doc, c: char, _: &str) -> HResult {
use CodeTypeIn::*;
match c {
TAG_BEGIN_CHAR => {
let content = doc.clean_chars_to_vec();
doc.current_node.borrow_mut().content = Some(content);
doc.check_textnode = if doc.repeat_whitespace {
Some(Rc::clone(&doc.current_node))
} else {
None
};
doc.mem_position = doc.position;
doc.set_tag_end_info();
doc.set_code_in(UnkownTag);
}
_ => {
if doc.repeat_whitespace {
doc.repeat_whitespace = c.is_ascii_whitespace();
}
doc.prev_chars.push(c);
}
}
Ok(())
}
fn parse_doctype_name(doc: &mut Doc, c: char, context: &str) -> HResult {
if c.is_ascii_whitespace() {
doc.set_tag_meta();
doc.set_tag_code_in(TagCodeIn::Wait);
return Ok(());
}
doc.error(ErrorKind::WrongHtmlDoctype(c), context)
}
fn parse_tag_name(doc: &mut Doc, c: char, _: &str) -> HResult {
use CodeTypeIn::*;
match c {
TAG_END_CHAR => {
let is_void = doc.set_tag_meta();
if is_void {
doc.chain_nodes.pop();
doc.set_code_in(Unkown);
} else {
doc.check_tag_type_do();
}
}
END_SLASH_CHAR => {
doc.set_tag_meta();
doc.handle = parse_tag_self_closing;
}
_ => {
if c.is_ascii_whitespace() {
doc.set_tag_meta();
doc.set_tag_code_in(TagCodeIn::Wait);
} else {
doc.prev_chars.push(c);
}
}
};
Ok(())
}
fn parse_tag_self_closing(doc: &mut Doc, c: char, context: &str) -> HResult {
match c {
TAG_END_CHAR => {
if let Some(meta) = &doc.current_node.borrow_mut().meta {
if !doc.parse_options.allow_self_closing {
if !(meta.borrow().is_void || doc.in_special.is_some()) {
return doc.error(
ErrorKind::WrongSelfClosing(chars_to_string(&meta.borrow().name)),
context,
);
}
}
meta.borrow_mut().self_closed = true;
} else {
panic!("self-closing tag's meta is emtpy");
}
doc.chain_nodes.pop();
doc.set_code_in(CodeTypeIn::Unkown);
}
END_SLASH_CHAR => {
}
_ => {
parse_tag_wait(doc, c, context)?;
}
}
Ok(())
}
fn parse_tag_wait(doc: &mut Doc, c: char, _: &str) -> HResult {
match c {
DOUBLE_QUOTE_CHAR | SINGLE_QUOTE_CHAR => {
let is_in_wait_value = doc.is_tag_code_in(&TagCodeIn::WaitValue);
if !is_in_wait_value {
if matches!(doc.code_in, CodeTypeIn::HTMLDOCTYPE) {
doc.add_tag_attr_value(Some(c));
} else {
doc.add_tag_attr_key();
doc.prev_chars.push(c);
doc.set_tag_code_in(TagCodeIn::Key);
return Ok(());
}
}
doc.mark_char = c;
doc.set_tag_code_in(TagCodeIn::Value);
}
TAG_END_CHAR => {
if doc
.current_node
.borrow()
.meta
.as_ref()
.expect("When parse tag in wait, the tag meta must not empty.")
.borrow()
.is_void
{
doc.chain_nodes.pop();
doc.set_code_in(CodeTypeIn::Unkown);
} else {
doc.check_tag_type_do();
}
}
END_SLASH_CHAR => {
doc.handle = parse_tag_self_closing;
}
EQUAL_CHAR => {
if doc.is_tag_code_in(&TagCodeIn::KeyEnd) {
doc.set_tag_code_in(TagCodeIn::WaitValue);
} else {
doc.prev_chars.push(c);
doc.mark_char = WS_CHAR;
doc.set_tag_code_in(TagCodeIn::Value);
}
}
_ => {
if !c.is_ascii_whitespace() {
doc.prev_chars.push(c);
if doc.is_tag_code_in(&TagCodeIn::WaitValue) {
doc.mark_char = WS_CHAR;
doc.set_tag_code_in(TagCodeIn::Value);
} else {
doc.add_tag_attr_key();
doc.set_tag_code_in(TagCodeIn::Key);
}
}
}
}
Ok(())
}
fn parse_tag_attr_key(doc: &mut Doc, c: char, context: &str) -> HResult {
match c {
EQUAL_CHAR => {
doc.set_tag_attr_key();
doc.set_tag_code_in(TagCodeIn::WaitValue);
}
TAG_END_CHAR => {
doc.set_tag_attr_key();
doc.set_tag_code_in(TagCodeIn::KeyEnd);
parse_tag_wait(doc, c, context)?;
}
END_SLASH_CHAR => {
doc.set_tag_attr_key();
doc.set_tag_code_in(TagCodeIn::Wait);
}
_ => {
if c.is_ascii_whitespace() {
doc.set_tag_attr_key();
doc.set_tag_code_in(TagCodeIn::KeyEnd);
doc.handle = parse_tag_wait;
} else {
doc.prev_chars.push(c);
}
}
};
Ok(())
}
fn parse_tag_attr_value(doc: &mut Doc, c: char, context: &str) -> HResult {
let quote = doc.mark_char;
let (is_end, attr_quote) = if quote == WS_CHAR {
if c == TAG_END_CHAR {
doc.set_tag_attr_value(None);
doc.set_tag_code_in(TagCodeIn::ValueEnd);
parse_tag_wait(doc, c, context)?;
return Ok(());
}
(c.is_ascii_whitespace(), None)
} else {
(c == quote, Some(quote))
};
if is_end {
doc.mark_char = EOF_CHAR;
doc.set_tag_attr_value(attr_quote);
doc.set_tag_code_in(TagCodeIn::Wait);
} else {
doc.prev_chars.push(c);
}
Ok(())
}
fn parse_tagend(doc: &mut Doc, c: char, context: &str) -> HResult {
use CodeTypeIn::*;
match c {
TAG_END_CHAR => {
let end_tag_name = doc.prev_chars.clone();
let fix_end_tag_name = chars_trim_end(&end_tag_name);
let mut is_endtag_ok = false;
if doc.chain_nodes.len() > 1 {
if let Some(tag) = doc.chain_nodes.last() {
let tag = tag.borrow();
let meta = tag
.meta
.as_ref()
.expect("Tag node must have a meta of tag name")
.borrow();
let start_tag_name = &meta.name;
let (is_equal, is_total_same) =
is_equal_chars_ignore_case(start_tag_name, fix_end_tag_name);
if is_equal {
if doc.parse_options.case_sensitive_tagname && !is_total_same {
return doc.error(
ErrorKind::WrongCaseSensitive(doc.chars_to_string()),
context,
);
}
is_endtag_ok = true;
}
}
}
if is_endtag_ok {
let last_tag = doc
.chain_nodes
.pop()
.expect("End tag must have matched tag in chain nodes");
last_tag.borrow_mut().end_tag = Some(Rc::clone(&doc.current_node));
let is_only_text_child = match &last_tag.borrow().childs {
Some(childs) => childs.len() == 1 && childs[0].borrow().node_type == NodeType::Text,
None => false,
};
if !is_only_text_child {
doc.set_text_spaces_between();
}
doc.set_tag_end_info();
doc.set_code_in(Unkown);
if doc.in_special.is_some()
&& is_equal_chars(&doc.in_special.as_ref().unwrap().1, fix_end_tag_name, &None)
{
doc.in_special = None;
}
let content = doc.clean_chars_to_vec();
let mut current_node = doc.current_node.borrow_mut();
current_node.parent = Some(Rc::downgrade(&last_tag));
current_node.content = Some(content);
} else {
if !doc.parse_options.auto_fix_unexpected_endtag {
return doc.error(ErrorKind::WrongEndTag(doc.chars_to_string()), context);
} else {
let mut orig_current_node: Option<RefNode> = None;
if let Some(prev_node) = &doc.current_node.borrow().prev {
orig_current_node = Some(prev_node.upgrade().expect(""));
}
if let Some(orig_current_node) = orig_current_node {
doc.current_node = orig_current_node;
}
doc.prev_chars.clear();
doc.set_code_in(Unkown);
return Ok(());
}
}
}
_ => {
doc.prev_chars.push(c);
}
}
Ok(())
}
fn parse_content_tag(doc: &mut Doc, c: char, _: &str) -> HResult {
use CodeTypeIn::*;
if c != TAG_END_CHAR {
doc.prev_chars.push(c);
return Ok(());
}
let end_tag = doc
.detect
.as_ref()
.expect("detect chars must set before set_code_in.");
let mut detect_len = end_tag.len();
let mut prev_len = doc.prev_chars.len();
if prev_len >= detect_len {
let prev_chars = &doc.prev_chars;
while prev_len > 0 {
let cur_index = prev_len - 1;
let prev_char = prev_chars[cur_index];
if !prev_char.is_ascii_whitespace() {
break;
} else {
prev_len = cur_index;
}
}
if prev_len >= detect_len {
let case_sensitive = doc.parse_options.case_sensitive_tagname;
while detect_len > 0 {
let detect_index = detect_len - 1;
let detect_char = end_tag[detect_index];
let prev_index = prev_len - 1;
let prev_char = prev_chars[prev_index];
let is_matched = if detect_char == prev_char {
true
} else {
if case_sensitive {
false
} else {
match detect_char {
'A'..='Z' => detect_char.to_ascii_lowercase() == prev_char,
'a'..='z' => detect_char.to_ascii_uppercase() == prev_char,
_ => false,
}
}
};
if is_matched {
detect_len = detect_index;
prev_len = prev_index;
} else {
break;
}
}
}
if detect_len == 0 {
doc.set_code_in(Unkown);
let end_tag_name = doc.prev_chars.split_off(prev_len).split_off(2);
let mut end = Node::new(NodeType::TagEnd, doc.position + 1);
end.content = Some(end_tag_name);
end.parent = Some(Rc::downgrade(&doc.current_node));
let node = Rc::new(RefCell::new(end));
let content = doc.prev_chars.split_off(0);
let mut current_node = doc.current_node.borrow_mut();
current_node.end_tag = Some(Rc::clone(&node));
current_node.content = Some(content);
doc.chain_nodes.pop();
doc.detect = None;
return Ok(());
}
}
doc.prev_chars.push(c);
Ok(())
}
fn parse_comment_or_cdata(doc: &mut Doc, c: char, _: &str) -> HResult {
use CodeTypeIn::*;
if c == TAG_END_CHAR {
let chars = &doc.prev_chars;
let total = chars.len();
if total > 2 {
let end_symbol = doc.mark_char;
let prev_char = chars[total - 1];
if prev_char == end_symbol && chars[total - 2] == end_symbol {
doc.prev_chars.truncate(total - 2);
let content = doc.clean_chars_to_vec();
doc.current_node.borrow_mut().content = Some(content);
doc.mark_char = EOF_CHAR;
doc.set_tag_end_info();
doc.set_code_in(Unkown);
return Ok(());
}
}
}
doc.prev_chars.push(c);
Ok(())
}
fn parse_unkown_tag(doc: &mut Doc, c: char, context: &str) -> HResult {
use CodeTypeIn::*;
match c {
'a'..='z' | 'A'..='Z' => {
let inner_node = Node::new(NodeType::Tag, doc.mem_position);
let node = Rc::new(RefCell::new(inner_node));
doc.add_new_node(node);
doc.set_code_in(Tag);
doc.set_text_spaces_between();
doc.prev_chars.push(c);
}
END_SLASH_CHAR => {
if !doc.parse_options.auto_fix_unclosed_tag {
doc.add_new_node(Rc::new(RefCell::new(Node::new(
NodeType::TagEnd,
doc.mem_position,
))));
} else {
let prev_node = Rc::downgrade(&doc.current_node);
doc.add_new_node(Rc::new(RefCell::new(Node::new(
NodeType::TagEnd,
doc.mem_position,
))));
doc.current_node.borrow_mut().prev = Some(prev_node);
}
doc.set_code_in(TagEnd);
}
'!' => {
doc.set_code_in(ExclamationBegin);
}
_ => {
if !doc.parse_options.auto_fix_unescaped_lt {
return create_parse_error(
ErrorKind::WrongTag(c.to_string()),
doc.mem_position,
context,
);
}
doc.fix_unescaped_lt(c, context)?;
}
};
Ok(())
}
fn parse_exclamation_begin(doc: &mut Doc, c: char, context: &str) -> HResult {
use CodeTypeIn::*;
let mut ignore_char = false;
if let Some(next_chars) = &doc.detect {
let total_len = doc.prev_chars.len();
let actual_len = next_chars.len();
if total_len < actual_len {
let cur_should_be = next_chars.get(total_len).unwrap();
if cur_should_be == &c.to_ascii_uppercase() {
if total_len == actual_len - 1 {
let begin_at = doc.mem_position;
match c {
DASH_CHAR | LEFT_BRACKET_CHAR => {
let code_in: CodeTypeIn;
let node_type: NodeType;
if c == DASH_CHAR {
code_in = Comment;
doc.mark_char = DASH_CHAR;
node_type = NodeType::Comment;
} else {
code_in = XMLCDATA;
doc.mark_char = RIGHT_BRACKET_CHAR;
node_type = NodeType::XMLCDATA;
}
doc.set_code_in(code_in);
doc.add_new_node(Rc::new(RefCell::new(Node::new(node_type, begin_at))));
doc.prev_chars.clear();
doc.set_text_spaces_between();
ignore_char = true;
}
'E' | 'e' => {
doc.set_code_in(HTMLDOCTYPE);
doc.add_new_node(Rc::new(RefCell::new(Node::new(
NodeType::HTMLDOCTYPE,
begin_at,
))));
doc.set_text_spaces_between();
}
_ => unreachable!(),
};
doc.detect = None;
}
} else {
return create_parse_error(
ErrorKind::UnrecognizedTag(doc.chars_to_string(), chars_to_string(next_chars)),
doc.mem_position,
context,
);
}
}
} else {
let detect_type: DetectChar;
match c {
DASH_CHAR => {
detect_type = DetectChar::Comment;
}
'D' | 'd' => {
detect_type = DetectChar::DOCTYPE;
}
LEFT_BRACKET_CHAR => {
let special_tag_name = doc.in_special.as_ref().map(|(_, name)| name);
if let Some(tag_name) = special_tag_name {
if is_equal_chars(
tag_name,
&doc
.chain_nodes
.last()
.unwrap()
.borrow()
.meta
.as_ref()
.expect("Chain nodes must all be tag nodes")
.borrow()
.name,
&None,
) {
return create_parse_error(
ErrorKind::CommonError("<![CDATA tag can in sub node".into()),
doc.position,
context,
);
} else {
detect_type = DetectChar::XMLCDATA;
}
} else {
return create_parse_error(
ErrorKind::CommonError("wrong <![CDATA tag can only used in Svg or MathML".into()),
doc.position,
context,
);
}
}
_ => {
return create_parse_error(
ErrorKind::WrongTag(doc.chars_to_string()),
doc.mem_position,
context,
);
}
};
doc.detect = Some(DETECT_CHAR_MAP.get(&detect_type).unwrap().to_vec());
}
if !ignore_char {
doc.prev_chars.push(c);
}
Ok(())
}
pub struct Doc {
code_in: CodeTypeIn,
position: usize,
mem_position: usize,
detect: Option<Vec<char>>,
prev_chars: Vec<char>,
mark_char: char,
chain_nodes: Vec<RefNode>,
current_node: RefNode,
in_special: Option<(SpecialTag, Vec<char>)>,
repeat_whitespace: bool,
check_textnode: Option<RefNode>,
handle: NextHandle,
pub chars: Vec<char>,
pub parse_options: ParseOptions,
pub root: RefNode,
pub id_tags: Rc<RefCell<StringNodeMap>>,
pub onerror: Rc<RefCell<Option<Rc<ErrorHandle>>>>,
}
pub type StringNodeMap = HashMap<String, RefNode>;
pub type ErrorHandle = Box<dyn Fn(BoxDynError)>;
impl Doc {
fn new() -> Self {
let node = Rc::new(RefCell::new(Node::new(NodeType::AbstractRoot, 0)));
let ref_node = Rc::clone(&node);
let current_node = Rc::clone(&node);
let root = Rc::clone(&node);
let mut nodes = Vec::with_capacity(ALLOC_NODES_CAPACITY);
let mut chain_nodes = Vec::with_capacity(ALLOC_NODES_CAPACITY);
node.borrow_mut().root = Some(Rc::downgrade(&root));
nodes.push(node);
chain_nodes.push(ref_node);
let mut doc = Doc {
code_in: CodeTypeIn::AbstractRoot,
position: 0,
mem_position: 0,
mark_char: EOF_CHAR,
prev_chars: Vec::with_capacity(ALLOC_CHAR_CAPACITY),
chain_nodes,
current_node,
detect: None,
in_special: None,
parse_options: Default::default(),
repeat_whitespace: false,
check_textnode: None,
handle: noop,
chars: Vec::with_capacity(100),
root,
id_tags: Rc::new(RefCell::new(HashMap::new())),
onerror: Rc::new(RefCell::new(None)),
};
doc.init();
doc
}
fn init(&mut self) {
self.handle = parse_wait;
}
pub fn into_root(self) -> DocHolder {
let doc = Rc::new(RefCell::new(self));
doc.borrow_mut().root.borrow_mut().document = Some(Rc::downgrade(&doc));
DocHolder { doc }
}
pub fn parse(content: &str, options: ParseOptions) -> GenResult<DocHolder> {
let mut doc = Doc::new();
doc.parse_options = options;
for c in content.chars() {
doc.next(c, content)?;
}
doc.eof(content)?;
Ok(doc.into_root())
}
pub fn parse_file<P>(filename: P, options: ParseOptions) -> GenResult<DocHolder>
where
P: AsRef<Path>,
{
let file_path = filename.as_ref();
let file_path = if file_path.is_absolute() {
file_path.to_path_buf()
} else {
env::current_dir()?.join(filename).canonicalize().unwrap()
};
let file = File::open(file_path)?;
let file = BufReader::new(file);
let mut doc = Doc::new();
let mut content = String::with_capacity(500);
doc.parse_options = options;
for line in file.lines() {
let line_content = line.unwrap();
content.push_str(&line_content);
for c in line_content.chars() {
doc.next(c, &content)?;
}
doc.next('\n', &content)?;
}
doc.eof(&content)?;
Ok(doc.into_root())
}
fn chars_to_string(&self) -> String {
chars_to_string(&self.prev_chars)
}
fn clean_chars_to_vec(&mut self) -> Vec<char> {
let mut content: Vec<char> = Vec::with_capacity(self.prev_chars.len());
content.append(&mut self.prev_chars);
content
}
fn set_code_in(&mut self, code_in: CodeTypeIn) {
self.code_in = code_in;
use CodeTypeIn::*;
match code_in {
Unkown => {
self.handle = parse_wait;
}
Tag => {
self.handle = parse_tag_name;
}
TagEnd => {
self.handle = parse_tagend;
}
TextNode => {
self.handle = parse_text;
}
HTMLScript | HTMLStyle | EscapeableRawText => {
self.handle = parse_content_tag;
}
HTMLDOCTYPE => {
self.handle = parse_doctype_name;
}
AbstractRoot => {
self.handle = parse_wait;
}
Comment => {
self.handle = parse_comment_or_cdata;
}
XMLCDATA => {
self.handle = parse_comment_or_cdata;
}
UnkownTag => {
self.handle = parse_unkown_tag;
}
ExclamationBegin => {
self.handle = parse_exclamation_begin;
}
};
}
fn next(&mut self, c: char, content: &str) -> HResult {
let handle = self.handle;
handle(self, c, content)?;
self.position += 1;
Ok(())
}
fn add_new_node(&mut self, node: RefNode) {
use NodeType::*;
let node_type = node.borrow().node_type;
if node_type != TagEnd {
let parent_node = self.chain_nodes.last().unwrap();
node.borrow_mut().parent = Some(Rc::downgrade(parent_node));
let mut parent_node = parent_node.borrow_mut();
let child = Rc::clone(&node);
if let Some(childs) = &mut parent_node.childs {
child.borrow_mut().index = childs.len();
childs.push(child);
} else {
child.borrow_mut().index = 0;
parent_node.childs = Some(vec![child]);
}
node.borrow_mut().root = Some(Rc::downgrade(&self.root));
}
self.current_node = Rc::clone(&node);
if node_type == Tag {
self.chain_nodes.push(Rc::clone(&node));
}
}
fn set_tag_end_info(&mut self) {
use NodeType::*;
let mut current_node = self.current_node.borrow_mut();
let node_type = current_node.node_type;
current_node.end_at = if node_type == Text {
self.position
} else {
self.position + 1
};
}
fn set_text_spaces_between(&mut self) {
if let Some(text_node) = &mut self.check_textnode {
text_node.borrow_mut().node_type = NodeType::SpacesBetweenTag;
self.check_textnode = None;
}
}
fn fix_unclosed_tag(&mut self, unclosed: &[RefNode]) {
for tag_node in unclosed {
let mut end_tag: Option<Node> = None;
if let Some(meta) = &tag_node.borrow_mut().meta {
meta.borrow_mut().auto_fix = true;
let tag_name = &meta.borrow().name;
let mut end = Node::new(NodeType::TagEnd, self.position);
end.content = Some(tag_name.clone());
end.parent = Some(Rc::downgrade(tag_node));
end_tag = Some(end);
}
if let Some(end_tag) = end_tag {
tag_node.borrow_mut().end_tag = Some(Rc::new(RefCell::new(end_tag)));
}
}
}
fn fix_unescaped_lt(&mut self, c: char, context: &str) -> HResult {
let mut chars = vec!['&', 'l', 't', ';'];
let mut parent: Option<RefNode> = None;
let mut need_parent = false;
let node_type = self.current_node.borrow().node_type;
let is_end_text = c == TAG_BEGIN_CHAR;
if !is_end_text {
chars.push(c);
}
match node_type {
NodeType::Text | NodeType::SpacesBetweenTag => {
if let Some(content) = &mut self.current_node.borrow_mut().content {
let mut prev_chars: Vec<char> = Vec::with_capacity(content.len() + chars.len());
prev_chars.append(content);
prev_chars.append(&mut chars);
self.prev_chars = prev_chars;
}
if matches!(node_type, NodeType::SpacesBetweenTag) {
self.current_node.borrow_mut().node_type = NodeType::Text;
}
}
NodeType::Tag => {
let is_closed = self
.current_node
.borrow()
.meta
.as_ref()
.map_or(false, |meta| {
let meta = &meta.borrow();
meta.self_closed || meta.auto_fix
});
if !is_closed {
parent = Some(Rc::clone(&self.current_node));
} else {
need_parent = true;
}
}
_ => {
need_parent = true;
}
}
if need_parent {
if !self.chain_nodes.is_empty() {
let index = self.chain_nodes.len() - 1;
parent = Some(Rc::clone(&self.chain_nodes[index]));
} else {
parent = Some(Rc::clone(&self.root));
}
}
if let Some(parent) = &parent {
let text_node = Node::new(NodeType::Text, self.mem_position);
let current_node = Rc::new(RefCell::new(text_node));
let mut parent = parent.borrow_mut();
let childs = parent.childs.get_or_insert(Vec::new());
current_node.borrow_mut().index = childs.len();
childs.push(Rc::clone(¤t_node));
self.current_node = current_node;
self.prev_chars = chars;
}
self.repeat_whitespace = false;
self.set_code_in(CodeTypeIn::TextNode);
if is_end_text {
let handle = self.handle;
handle(self, c, context)?;
}
Ok(())
}
fn eof(&mut self, context: &str) -> HResult {
let cur_depth = self.chain_nodes.len();
if cur_depth > 1 {
if !self.parse_options.auto_fix_unclosed_tag {
let last_node = self.chain_nodes[cur_depth - 1].borrow();
let begin_at = last_node.begin_at;
let tag_name = chars_to_string(
&last_node
.meta
.as_ref()
.expect("tag node's meta must have")
.borrow()
.name,
);
return create_parse_error(ErrorKind::UnclosedTag(tag_name), begin_at, context);
}
let unclosed = self.chain_nodes.split_off(1);
self.fix_unclosed_tag(&unclosed);
}
use CodeTypeIn::*;
match self.code_in {
TextNode => {
let mut last_node = self.current_node.borrow_mut();
last_node.content = Some(self.prev_chars.clone());
if self.repeat_whitespace {
last_node.node_type = NodeType::SpacesBetweenTag;
}
last_node.end_at = self.position;
}
Unkown | AbstractRoot => {
}
Tag => {
if !self.parse_options.auto_fix_unclosed_tag {
return create_parse_error(
ErrorKind::UnclosedTag(format!("{:?}", self.code_in)),
self.current_node.borrow().begin_at,
context,
);
}
}
_ => {
return create_parse_error(
ErrorKind::UnclosedTag(format!("{:?}", self.code_in)),
self.current_node.borrow().begin_at,
context,
);
}
}
self.root.borrow_mut().end_at = self.position;
Ok(())
}
fn error(&self, kind: ErrorKind, context: &str) -> HResult {
create_parse_error(kind, self.position, context)
}
fn set_tag_meta(&mut self) -> bool {
let name = self.clean_chars_to_vec();
let lc_name = name
.iter()
.map(|ch| ch.to_ascii_lowercase())
.collect::<Vec<char>>();
let is_void = is_void_tag(&lc_name);
let meta = TagMeta {
name,
attrs: Vec::with_capacity(5),
lc_name_map: HashMap::with_capacity(5),
is_void,
..Default::default()
};
self.current_node.borrow_mut().meta = Some(RefCell::new(meta));
is_void
}
fn set_tag_code_in(&mut self, code_in: TagCodeIn) {
use TagCodeIn::*;
match code_in {
Wait | WaitValue | ValueEnd => self.handle = parse_tag_wait,
Key => self.handle = parse_tag_attr_key,
Value => self.handle = parse_tag_attr_value,
KeyEnd => {}
};
if let Some(meta) = &self.current_node.borrow_mut().meta {
meta.borrow_mut().code_in = code_in;
}
}
fn is_tag_code_in(&self, code_in: &TagCodeIn) -> bool {
let current_node = self.current_node.borrow();
let tag_code_in = ¤t_node
.meta
.as_ref()
.expect("Tag meta must set in parse_tag_name or parse_doctype_name")
.borrow()
.code_in;
tag_code_in == code_in
}
fn add_tag_attr_key(&mut self) {
if let Some(meta) = &self.current_node.borrow_mut().meta {
meta.borrow_mut().add_attr_key();
meta.borrow_mut().code_in = TagCodeIn::Key;
}
}
fn set_tag_attr_key(&mut self) {
let key = self.clean_chars_to_vec();
if let Some(meta) = &self.current_node.borrow_mut().meta {
let mut meta = meta.borrow_mut();
let index = meta.attrs.len() - 1;
let key_name = key
.iter()
.map(|ch| ch.to_ascii_lowercase())
.collect::<String>();
meta.lc_name_map.entry(key_name).or_insert(index);
meta.set_attr_key(key);
}
}
fn add_tag_attr_value(&mut self, quote: Option<char>) {
let current_node = self.current_node.borrow_mut();
let meta = current_node
.meta
.as_ref()
.expect("The tag meta must not be empty");
meta.borrow_mut().add_attr_value(quote);
meta.borrow_mut().code_in = TagCodeIn::Value;
}
fn set_tag_attr_value(&mut self, quote: Option<char>) {
let value = self.clean_chars_to_vec();
if let Some(meta) = &self.current_node.borrow_mut().meta {
let mut meta = meta.borrow_mut();
let attr = meta.set_attr_value(value, quote);
if let Some(id_name) = attr.check_if_id() {
self
.id_tags
.borrow_mut()
.insert(id_name, Rc::clone(&self.current_node));
}
}
}
fn check_tag_type_do(&mut self) {
use CodeTypeIn::*;
let lc_tag_name = self
.current_node
.borrow()
.meta
.as_ref()
.expect("")
.borrow()
.name
.iter()
.map(|ch| ch.to_ascii_lowercase())
.collect::<Vec<char>>();
if is_content_tag(&lc_tag_name, &None) {
self.mem_position = self.position;
let code_in = if is_equal_chars(&lc_tag_name, &SCRIPT_TAG_NAME, &None) {
HTMLScript
} else if is_equal_chars(&lc_tag_name, &STYLE_TAG_NAME, &None) {
HTMLStyle
} else {
EscapeableRawText
};
self.set_code_in(code_in);
let mut next_chars = vec!['<', END_SLASH_CHAR];
next_chars.extend_from_slice(
&self
.current_node
.borrow()
.meta
.as_ref()
.expect("")
.borrow()
.name,
);
self.detect = Some(next_chars);
} else {
if self.in_special.is_none() {
self.in_special = if let Some(&special) = SPECIAL_TAG_MAP.get(&lc_tag_name) {
Some((
special,
self
.current_node
.borrow()
.meta
.as_ref()
.expect("")
.borrow()
.name
.clone(),
))
} else {
None
}
}
self.set_code_in(Unkown);
}
}
}
fn get_element(map: &StringNodeMap, query: &str) -> Option<RefNode> {
map.get(query).map(Rc::clone)
}
pub struct DocHolder {
doc: RefDoc,
}
impl DocHolder {
pub fn render(&self, options: &RenderOptions) -> String {
chars_to_string(&self.borrow().root.borrow().build(options, false))
}
pub fn render_text(&self, options: &RenderOptions) -> String {
chars_to_string(&self.borrow().root.borrow().build(options, true))
}
pub fn borrow(&self) -> Ref<Doc> {
self.doc.borrow()
}
pub fn get_root_node(&self) -> RefNode {
Rc::clone(&self.borrow().root)
}
pub fn get_element_by_id(&self, id: &str) -> Option<RefNode> {
get_element(&self.borrow().id_tags.borrow(), id)
}
}
impl From<RefDoc> for DocHolder {
fn from(doc: RefDoc) -> Self {
DocHolder { doc }
}
}