mod mesdoc;
use mesdoc::interface::{
BoxDynElement, BoxDynNode, BoxDynText, BoxDynUncareNode, Elements, IDocumentTrait, IElementTrait,
IErrorHandle, INodeTrait, ITextTrait, IUncareNodeTrait, InsertPosition, MaybeDoc, MaybeElement,
Texts,
};
use mesdoc::{error::Error as IError, utils::retain_by_index};
use rphtml::{
config::RenderOptions,
entity::{encode, encode_chars, EncodeType, EntitySet},
parser::{
allow_insert, is_content_tag, Attr, AttrData, Doc, DocHolder, NameCase, Node, NodeType, RefNode,
},
};
use std::error::Error;
use std::rc::Rc;
use std::{any::Any, cell::RefCell};
pub mod types {
pub use crate::mesdoc::interface::{
BoxDynElement, BoxDynNode, BoxDynText, Elements, IAttrValue, IDocumentTrait, IEnumTyped,
INodeType, Texts,
};
pub use crate::mesdoc::selector::Combinator;
}
pub mod html {
pub use rphtml::config::ParseOptions;
}
use crate::html::ParseOptions;
use crate::types::{IAttrValue, IEnumTyped, INodeType};
struct Dom;
impl Dom {
fn halt(dom: &Rc<RefCell<Node>>, method: &str, message: &str) {
if let Some(doc) = &dom.owner_document() {
doc.trigger_error(Box::new(IError::InvalidTraitMethodCall {
method: String::from(method),
message: String::from(message),
}));
}
}
fn validate_dom_change(dom: &Rc<RefCell<Node>>, node: &BoxDynElement, method: &str) -> bool {
let my_node_type = dom.borrow().node_type;
if my_node_type != NodeType::Tag {
Dom::halt(
dom,
method,
&format!("Can't {} for a {:?} type", method, my_node_type),
);
return false;
}
if let INodeType::Document = node.node_type() {
Dom::halt(dom, method, &format!("Can't {} of a document type", method));
return false;
}
if dom.is(&node) {
Dom::halt(dom, method, &format!("Can't {} of dom.", method));
return false;
}
let mut cur = dom.cloned();
while let Some(parent) = &cur.parent() {
if parent.is(&node) {
Dom::halt(dom, method, &format!("Can't {} of self's parent", method));
return false;
}
cur = parent.cloned();
}
true
}
}
fn to_static_str(orig: String) -> &'static str {
Box::leak(orig.into_boxed_str())
}
fn reset_next_siblings_index(start_index: usize, childs: &[RefNode]) {
for (step, node) in childs.iter().enumerate() {
node.borrow_mut().index = start_index + step;
}
}
fn remove_not_allowed_nodes(tag_name: &[char], nodes: &mut Vec<RefNode>) -> bool {
let mut not_allowed_indexs: Vec<usize> = Vec::with_capacity(nodes.len());
let orig_len = nodes.len();
for (index, node) in nodes.iter().enumerate() {
if !allow_insert(tag_name, node.borrow().node_type) {
not_allowed_indexs.push(index);
}
}
if !not_allowed_indexs.is_empty() {
retain_by_index(nodes, ¬_allowed_indexs);
}
let now_allowed_len = not_allowed_indexs.len();
now_allowed_len > 0 && now_allowed_len != orig_len
}
fn check_if_content_tag(name: &[char]) -> bool {
is_content_tag(name, &Some(NameCase::Lower))
}
impl INodeTrait for Rc<RefCell<Node>> {
fn to_node(self: Box<Self>) -> Box<dyn Any> {
self
}
fn index(&self) -> usize {
self.borrow().index
}
fn clone_node<'b>(&self) -> BoxDynNode<'b> {
Box::new(self.clone())
}
fn typed<'b>(self: Box<Self>) -> IEnumTyped<'b> {
match self.node_type() {
INodeType::Element | INodeType::DocumentFragement | INodeType::Document => {
IEnumTyped::Element(self as BoxDynElement)
}
INodeType::Text => IEnumTyped::Text(self as BoxDynText),
_ => IEnumTyped::UncareNode(self as BoxDynUncareNode),
}
}
fn node_type(&self) -> INodeType {
let node_type = self.borrow().node_type;
match node_type {
NodeType::AbstractRoot => {
let (is_document, _) = self.borrow().is_document();
if is_document {
INodeType::Document
} else {
INodeType::DocumentFragement
}
}
NodeType::Comment => INodeType::Comment,
NodeType::Text | NodeType::SpacesBetweenTag => INodeType::Text,
NodeType::Tag => INodeType::Element,
NodeType::XMLCDATA => INodeType::XMLCDATA,
NodeType::HTMLDOCTYPE => INodeType::HTMLDOCTYPE,
_ => INodeType::Other,
}
}
fn parent<'b>(&self) -> MaybeElement<'b> {
if let Some(parent) = &self.borrow().parent {
if let Some(node) = parent.upgrade() {
return Some(Box::new(node));
}
}
None
}
fn owner_document(&self) -> MaybeDoc {
if let Some(root) = &self.borrow().root {
if let Some(root) = &root.upgrade() {
if let Some(doc) = &root.borrow().document {
return Some(Box::new(Document {
doc: Rc::clone(doc).into(),
}));
}
}
}
None
}
fn text_contents(&self) -> Vec<char> {
self.borrow().build(
&RenderOptions {
decode_entity: true,
..Default::default()
},
matches!(self.node_type(), INodeType::Element),
)
}
fn set_text(&mut self, content: &str) {
let node_type = self.node_type();
match node_type {
INodeType::Element => {
let no_content_tag = !check_if_content_tag(&self.tag_names());
let mut node = self.borrow_mut();
if !content.is_empty() {
if no_content_tag {
let content = encode(content, EntitySet::SpecialChars, EncodeType::NamedOrDecimal);
let mut text_node = Node::create_text_node(content, None);
text_node.parent = Some(Rc::downgrade(&self));
node.childs = Some(vec![Rc::new(RefCell::new(text_node))]);
} else {
node.content = Some(content.chars().collect::<Vec<char>>());
}
} else {
if no_content_tag {
node.childs = None;
} else {
node.content = None;
}
}
}
INodeType::Text => {
if content.is_empty() {
Dom::halt(self,"set_text",
"the text parameter can't be empty, if you want to remove a text node, you can use 'remove' method instead."
);
} else {
self.borrow_mut().content = Some(content.chars().collect::<Vec<char>>());
}
}
_ => {
}
}
}
fn set_html(&mut self, content: &str) {
let mut is_element = true;
let target = match self.node_type() {
INodeType::Element => Some(Rc::clone(&self)),
INodeType::Text => {
if let Some(parent) = &self.borrow_mut().parent {
if let Some(parent) = &parent.upgrade() {
is_element = false;
Some(Rc::clone(parent))
} else {
None
}
} else {
None
}
}
_ => None,
};
if let Some(target) = &target {
if check_if_content_tag(
&target
.borrow()
.meta
.as_ref()
.expect("A tag use `set_html` must have a tag name.")
.borrow()
.name,
) {
target.borrow_mut().content = Some(content.chars().collect::<Vec<char>>());
} else {
let doc_holder = Doc::parse(
content,
ParseOptions {
auto_fix_unexpected_endtag: true,
auto_fix_unescaped_lt: true,
..Default::default()
},
)
.unwrap();
if let Some(nodes) = &mut doc_holder.get_root_node().borrow_mut().childs {
let mut nodes = nodes.split_off(0);
let has_not_allowed = remove_not_allowed_nodes(
&target
.borrow()
.meta
.as_ref()
.expect("A tag use `set_html` must have a tag name.")
.borrow()
.name,
&mut nodes,
);
let has_nodes = !nodes.is_empty();
if has_nodes {
for node in &nodes {
node.borrow_mut().parent = Some(Rc::downgrade(target));
}
}
if is_element {
if has_not_allowed {
reset_next_siblings_index(0, &nodes);
}
(*target.borrow_mut()).childs = if has_nodes { Some(nodes) } else { None };
} else if let Some(childs) = &mut target.borrow_mut().childs {
let index = self.index();
if index < childs.len() - 1 {
reset_next_siblings_index(index + nodes.len(), &childs[index + 1..]);
}
if has_nodes {
reset_next_siblings_index(index, &nodes);
childs.splice(index..index + 1, nodes);
} else {
childs.remove(index);
}
} else {
}
} else {
target.borrow_mut().childs = None;
}
}
}
}
}
impl ITextTrait for Rc<RefCell<Node>> {
fn remove(self: Box<Self>) {
let index = self.index();
if let Some(parent) = &self.borrow_mut().parent {
if let Some(parent) = parent.upgrade() {
if let Some(childs) = &mut parent.borrow_mut().childs {
childs.remove(index);
reset_next_siblings_index(index, &childs[index..]);
}
}
}
}
fn append_text(&mut self, content: &str) {
let chars = content.chars().collect::<Vec<char>>();
if let Some(content) = &mut self.borrow_mut().content {
content.extend(chars);
} else {
self.borrow_mut().content = Some(chars);
}
}
fn prepend_text(&mut self, content: &str) {
let chars = content.chars().collect::<Vec<char>>();
if let Some(content) = &mut self.borrow_mut().content {
content.splice(0..0, chars);
} else {
self.borrow_mut().content = Some(chars);
}
}
}
impl IUncareNodeTrait for Rc<RefCell<Node>> {}
impl IElementTrait for Rc<RefCell<Node>> {
fn tag_names(&self) -> Vec<char> {
match self.node_type() {
INodeType::Element => {
if let Some(meta) = &self.borrow().meta {
return meta
.borrow()
.name
.iter()
.map(|ch| ch.to_ascii_lowercase())
.collect();
}
Dom::halt(self, "tag_name", "Html syntax error: not found a tag name.");
}
INodeType::Document | INodeType::DocumentFragement => {}
cur_type => Dom::halt(
self,
"tag_name",
&format!("The node type of '{:?}' doesn't have a tag name.", cur_type),
),
};
vec![]
}
fn child_nodes_length(&self) -> usize {
self
.borrow()
.childs
.as_ref()
.map_or(0, |childs| childs.len())
}
fn child_nodes_item<'b>(&self, index: usize) -> Option<BoxDynNode<'b>> {
if let Some(childs) = &self.borrow().childs {
return childs
.get(index)
.map(|node| Box::new(Rc::clone(node)) as BoxDynNode);
}
None
}
fn child_nodes_item_since_by<'a>(
&'a self,
node_index: usize,
reverse: bool,
mut handle: Box<dyn FnMut(&dyn IElementTrait) -> bool + 'a>,
) {
if let Some(childs) = &self.borrow().childs {
if reverse {
for child in childs[0..=node_index].iter().rev() {
if matches!(child.node_type(), INodeType::Element) {
if !handle(child) {
break;
}
}
}
} else {
for child in childs[node_index..].iter() {
if matches!(child.node_type(), INodeType::Element) {
if !handle(child) {
break;
}
}
}
}
}
}
fn children_by<'a>(&'a self, mut matcher: Box<dyn FnMut(&dyn IElementTrait) + 'a>) {
if let Some(childs) = &self.borrow().childs {
for child in childs {
if matches!(child.node_type(), INodeType::Element) {
matcher(child);
}
}
}
}
fn get_attribute(&self, name: &str) -> Option<IAttrValue> {
let node = &self.borrow();
let meta = node
.meta
.as_ref()
.expect("Element node must have a meta field.");
let lc_name_map = &meta.borrow().lc_name_map;
if !lc_name_map.is_empty() {
if let Some(&index) = lc_name_map.get(&name.to_ascii_lowercase()) {
let attrs = &meta.borrow().attrs;
let attr = &attrs[index];
if let Some(value) = &attr.value {
let attr_value = value.content.clone();
return Some(IAttrValue::Value(attr_value.iter().collect(), attr.quote));
} else {
return Some(IAttrValue::True);
}
}
}
None
}
fn set_attribute(&mut self, name: &str, value: Option<&str>) {
let mut need_quote = false;
let mut quote: char = '"';
if let Some(meta) = &self.borrow().meta {
let value = value.map(|v| {
let mut find_quote: bool = false;
let mut content = Vec::with_capacity(v.len());
for ch in v.chars() {
if !need_quote {
need_quote = Attr::need_quoted_char(&ch);
}
if ch == '"' || ch == '\'' {
if find_quote {
if quote == ch {
let mut encoded_quote =
encode_chars(&[ch], EntitySet::SpecialChars, EncodeType::Named);
content.append(&mut encoded_quote);
} else {
content.push(ch);
}
} else {
find_quote = true;
if ch == '"' {
quote = '\'';
}
content.push(ch);
}
} else {
content.push(ch);
}
}
AttrData { content }
});
let lc_name = name.to_ascii_lowercase();
let find_index = if let Some(&index) = meta.borrow().lc_name_map.get(&lc_name) {
Some(index)
} else {
None
};
if let Some(index) = find_index {
meta.borrow_mut().attrs[index].value = value;
return;
}
let index = meta.borrow().attrs.len();
meta.borrow_mut().lc_name_map.insert(lc_name, index);
let quote = if value.is_some() { Some(quote) } else { None };
meta.borrow_mut().attrs.push(Attr {
key: Some(AttrData {
content: name.chars().collect(),
}),
value,
quote,
need_quote,
});
}
}
fn remove_attribute(&mut self, name: &str) {
if let Some(meta) = &self.borrow().meta {
let mut find_index: Option<usize> = None;
if !meta.borrow().lc_name_map.is_empty() {
let lc_name = name.to_ascii_lowercase();
if let Some(&index) = meta.borrow().lc_name_map.get(&lc_name) {
find_index = Some(index);
}
}
if let Some(index) = find_index {
meta.borrow_mut().attrs[index] = Attr::default();
meta.borrow_mut().lc_name_map.remove(name);
}
}
}
fn inner_html(&self) -> String {
self
.borrow()
.build(
&RenderOptions {
inner_html: true,
encode_content: true,
..Default::default()
},
false,
)
.iter()
.collect::<String>()
}
fn outer_html(&self) -> String {
self
.borrow()
.build(
&RenderOptions {
encode_content: true,
..Default::default()
},
false,
)
.iter()
.collect::<String>()
}
fn remove_child(&mut self, ele: BoxDynElement) {
if let Some(parent) = &ele.parent() {
if self.is(parent) {
if let Some(childs) = self.borrow_mut().childs.as_mut() {
let index = ele.index();
if index != childs.len() - 1 {
reset_next_siblings_index(index, &childs[index + 1..]);
}
childs.remove(index);
}
}
}
}
fn insert_adjacent(&mut self, position: &InsertPosition, node: &BoxDynElement) {
let action = position.action();
if !Dom::validate_dom_change(self, &node, action) {
return;
}
let node_type = node.node_type();
let specified: Box<dyn Any> = node.cloned().to_node();
if let Ok(dom) = specified.downcast::<RefNode>() {
let mut nodes = match node_type {
INodeType::DocumentFragement => {
if let Some(childs) = &dom.borrow().childs {
childs
.iter()
.map(|v| Rc::clone(&v))
.collect::<Vec<RefNode>>()
} else {
vec![]
}
}
_ => {
if let Some(parent) = &mut node.parent() {
parent.remove_child(node.cloned());
}
vec![*dom]
}
};
let tag_name = self.tag_names();
remove_not_allowed_nodes(&tag_name, &mut nodes);
if nodes.is_empty() {
return;
}
use InsertPosition::*;
match position {
BeforeBegin | AfterEnd => {
let mut index = self.index();
let mut nexts: Vec<RefNode> = vec![];
let insert_len = nodes.len();
if *position == AfterEnd {
index += 1;
}
reset_next_siblings_index(index, &nodes);
if let Some(parent) = &self.borrow_mut().parent {
if let Some(parent) = &parent.upgrade() {
if let Some(childs) = &mut parent.borrow_mut().childs {
if index < childs.len() {
nexts = childs.split_off(index);
}
for node in &nodes {
node.borrow_mut().parent = Some(Rc::downgrade(parent));
}
childs.extend(nodes);
}
}
}
if !nexts.is_empty() {
reset_next_siblings_index(index + insert_len, &nexts);
if let Some(parent) = &self.borrow_mut().parent {
if let Some(parent) = parent.upgrade() {
if let Some(childs) = &mut parent.borrow_mut().childs {
childs.extend(nexts);
}
}
}
}
}
AfterBegin | BeforeEnd => {
for node in &nodes {
node.borrow_mut().parent = Some(Rc::downgrade(&self));
}
if let Some(childs) = &mut self.borrow_mut().childs {
if *position == BeforeEnd {
reset_next_siblings_index(childs.len(), &nodes);
childs.extend(nodes);
} else {
reset_next_siblings_index(0, &nodes);
reset_next_siblings_index(nodes.len(), &childs);
nodes.append(childs);
*childs = nodes;
}
} else {
reset_next_siblings_index(0, &nodes);
self.borrow_mut().childs = Some(nodes);
}
}
}
} else {
Dom::halt(
self,
action,
&format!("Can't {} that not implemented 'Dom'", action),
);
}
}
fn texts<'b>(&self, limit_depth: u32) -> Option<Texts<'b>> {
let limit_depth = if limit_depth == 0 {
u32::MAX
} else {
limit_depth
};
let mut result: Texts = Texts::with_capacity(5);
fn loop_handle(node: BoxDynElement, result: &mut Texts, cur_depth: u32, limit_depth: u32) {
let child_nodes = node.child_nodes();
if !child_nodes.is_empty() {
let next_depth = cur_depth + 1;
let recursive = next_depth < limit_depth;
for node in &node.child_nodes() {
match node.node_type() {
INodeType::Text => {
let node = node.clone_node();
let text = node.typed().into_text().expect("TextNode must true");
result.get_mut_ref().push(text);
}
INodeType::Element => {
if recursive {
let node = node.clone_node();
let ele = node.typed().into_element().expect("ElementNode must true");
loop_handle(ele, result, next_depth, limit_depth);
}
}
_ => {}
}
}
} else if check_if_content_tag(&node.tag_names()) {
result.get_mut_ref().push(
node
.into_text()
.expect("Content tag must be able to translate into text node"),
);
}
}
let node = Box::new(Rc::clone(&self)) as BoxDynElement;
loop_handle(node, &mut result, 0, limit_depth);
if !result.is_empty() {
return Some(result);
}
None
}
fn into_text<'b>(self: Box<Self>) -> Result<BoxDynText<'b>, Box<dyn Error>> {
if check_if_content_tag(&self.tag_names()) {
Ok(self as BoxDynText)
} else {
Err(Box::new(IError::InvalidTraitMethodCall {
method: "into_text".into(),
message: "Can't call 'into_text' with tags those are not content tags.".into(),
}))
}
}
fn is(&self, ele: &BoxDynElement) -> bool {
let specified: Box<dyn Any> = ele.cloned().to_node();
if let Ok(dom) = specified.downcast::<RefNode>() {
return Node::is_same(&self, &dom);
}
false
}
fn is_root_element(&self) -> bool {
matches!(self.borrow().node_type, NodeType::AbstractRoot)
}
}
struct Document {
doc: DocHolder,
}
impl Document {
fn bind_error(&mut self, handle: IErrorHandle) {
*self.doc.borrow().onerror.borrow_mut() = Some(Rc::new(handle));
}
fn list<'b>(&self) -> Elements<'b> {
let root = Rc::clone(&self.doc.borrow().root);
Elements::with_nodes(vec![Box::new(root)])
}
}
impl IDocumentTrait for Document {
fn get_element_by_id<'b>(&self, id: &str) -> Option<BoxDynElement<'b>> {
if let Some(node) = self.doc.get_element_by_id(id) {
return Some(Box::new(Rc::clone(&node)));
}
None
}
fn source_code(&self) -> &'static str {
to_static_str(self.doc.render(&Default::default()))
}
fn get_root_node<'b>(&self) -> BoxDynNode<'b> {
Box::new(Rc::clone(&self.doc.borrow().root))
}
fn onerror(&self) -> Option<Rc<IErrorHandle>> {
if let Some(error_handle) = &(*self.doc.borrow().onerror.borrow()) {
Some(Rc::clone(error_handle))
} else {
None
}
}
}
pub struct Vis;
impl Vis {
pub(crate) fn options() -> ParseOptions {
ParseOptions {
auto_fix_unclosed_tag: true,
auto_fix_unexpected_endtag: true,
auto_fix_unescaped_lt: true,
allow_self_closing: true,
..Default::default()
}
}
pub(crate) fn parse_doc_options(
html: &str,
options: ParseOptions,
) -> Result<Document, Box<dyn Error>> {
mesdoc::init();
let doc = Doc::parse(html, options)?;
Ok(Document { doc })
}
pub fn load_options(html: &str, options: ParseOptions) -> Result<Elements, Box<dyn Error>> {
let doc = Vis::parse_doc_options(html, options)?;
Ok(doc.list())
}
pub fn load_options_catch(html: &str, options: ParseOptions, handle: IErrorHandle) -> Elements {
let doc = Vis::parse_doc_options(html, options);
if let Ok(mut doc) = doc {
doc.bind_error(handle);
doc.list()
} else {
handle(doc.err().unwrap());
Elements::new()
}
}
pub fn load(html: &str) -> Result<Elements, Box<dyn Error>> {
Vis::load_options(html, Vis::options())
}
pub fn load_catch(html: &str, handle: IErrorHandle) -> Elements {
Vis::load_options_catch(html, Vis::options(), handle)
}
pub fn dom<'b>(ele: &BoxDynElement) -> Elements<'b> {
Elements::with_nodes(vec![ele.cloned()])
}
}