pub mod comment;
pub mod tag;
pub mod text;
pub use comment::Comment;
pub use tag::Tag;
pub use text::Text;
use crate::parser;
use crate::searcher;
#[derive(Debug, PartialEq, Clone)]
pub enum DomType {
Tag,
Text,
Comment,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Dom {
pub dom_type: DomType,
tag: Option<Tag>,
text: Option<Text>,
comment: Option<Comment>,
children: Option<Vec<Box<Dom>>>,
}
impl Dom {
pub fn new(dom_type: DomType) -> Dom {
Dom {
dom_type,
tag: None,
text: None,
comment: None,
children: None,
}
}
pub fn new_root() -> Dom {
let tag = Tag::new("root");
let mut dom = Dom::new(DomType::Tag);
dom.set_tag(tag);
dom
}
fn domtype_str(&self) -> String {
match self.dom_type {
DomType::Tag => return String::from("Tag"),
DomType::Text => return String::from("Text"),
DomType::Comment => return String::from("Comment"),
}
}
pub fn set_tag(&mut self, tag: Tag) {
match self.dom_type {
DomType::Tag => self.tag = Some(tag),
_ => panic!("invalid DomType. expect Tag but {}", self.domtype_str()),
}
}
pub fn get_tag(&self) -> Option<&Tag> {
self.tag.as_ref()
}
pub fn set_text(&mut self, text: Text) {
match self.dom_type {
DomType::Text => self.text = Some(text),
_ => panic!("invalid DomType. expect Text but {}", self.domtype_str()),
}
}
pub fn get_text(&self) -> Option<&Text> {
self.text.as_ref()
}
pub fn set_comment(&mut self, comment: Comment) {
match self.dom_type {
DomType::Comment => self.comment = Some(comment),
_ => panic!("invalid DomType. expect Comment but {}", self.domtype_str()),
}
}
pub fn get_comment(&self) -> Option<&Comment> {
self.comment.as_ref()
}
pub fn add_child(&mut self, dom: Dom) {
let dom = Box::new(dom);
match &mut self.children {
Some(children) => {
children.push(dom);
}
None => {
let mut children = Vec::new();
children.push(dom);
self.children = Some(children);
}
}
}
pub fn get_children(&self) -> Option<&Vec<Box<Dom>>> {
self.children.as_ref()
}
pub fn p_implies_q(p: &Dom, q: &Dom) -> bool {
if q.dom_type != p.dom_type {
return false;
}
match q.dom_type {
DomType::Tag => {
if let Some(q_tag) = q.get_tag() {
if let Some(p_tag) = p.get_tag() {
return Tag::p_implies_q(p_tag, q_tag);
}
}
}
DomType::Text => {
if let Some(q_text) = q.get_text() {
if let Some(p_text) = p.get_text() {
if q_text.get_text().contains(p_text.get_text()) {
return true;
}
}
}
}
DomType::Comment => {
if let Some(q_comment) = q.get_comment() {
if let Some(p_comment) = p.get_comment() {
if q_comment.get_comment().contains(p_comment.get_comment()) {
return true;
}
}
}
}
}
false
}
pub fn p_implies_q_tree(p: &Dom, q: &Dom) -> bool {
if !Dom::p_implies_q(p, q) {
return false;
}
if let None = p.get_children() {
return true;
}
if let None = q.get_children() {
return false;
}
let p_children = p.get_children().unwrap();
let q_children = q.get_children().unwrap();
for p_child in p_children.iter() {
let mut child_match = false;
for q_child in q_children.iter() {
if Dom::p_implies_q_tree(p_child, q_child) {
child_match = true;
break;
}
}
if !child_match {
return false;
}
}
true
}
pub fn search(&self, needle: &str) -> Result<Option<Vec<Box<Dom>>>, String> {
let needle = parser::parse(&needle)?;
let needle = needle.get_children().unwrap().get(0).unwrap();
let root_dom = match searcher::search_dom(&self, &needle) {
Some(root_dom) => root_dom,
None => return Ok(None),
};
match root_dom.get_children() {
Some(children) => Ok(Some(children.clone())),
None => Ok(None),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser;
#[test]
fn sufficient_condition() {
let mut p = Dom::new(DomType::Tag);
let mut tag = Tag::new("h1");
tag.set_attr("class", "target");
p.set_tag(tag);
let mut q = Dom::new(DomType::Tag);
let mut tag = Tag::new("h1");
tag.set_attr("id", "q");
tag.set_attr("class", "target");
q.set_tag(tag);
assert_eq!(Dom::p_implies_q(&p, &q), true);
}
#[test]
fn not_sufficient_condition() {
let mut p = Dom::new(DomType::Tag);
let mut tag = Tag::new("h1");
tag.set_attr("class", "target");
p.set_tag(tag);
let mut q = Dom::new(DomType::Tag);
let mut tag = Tag::new("h1");
tag.set_attr("id", "q");
q.set_tag(tag);
assert_eq!(Dom::p_implies_q(&p, &q), false);
}
#[test]
fn text_sufficient_condition() {
let mut p = Dom::new(DomType::Text);
let text = Text::new("def");
p.set_text(text);
let mut q = Dom::new(DomType::Text);
let text = Text::new("abcdefghi");
q.set_text(text);
assert_eq!(Dom::p_implies_q(&p, &q), true);
}
#[test]
fn p_implies_q_tree_test() {
let mut p = Dom::new(DomType::Tag);
let h1_tag = Tag::new("h1");
p.set_tag(h1_tag);
let mut div_dom = Dom::new(DomType::Tag);
let div_tag = Tag::new("div");
div_dom.set_tag(div_tag);
p.add_child(div_dom);
let mut ul_dom = Dom::new(DomType::Tag);
let ul_tag = Tag::new("ul");
ul_dom.set_tag(ul_tag);
let mut li_dom = Dom::new(DomType::Tag);
let li_tag = Tag::new("li");
li_dom.set_tag(li_tag);
ul_dom.add_child(li_dom);
p.add_child(ul_dom);
let mut q = Dom::new(DomType::Tag);
let h1_tag = Tag::new("h1");
q.set_tag(h1_tag);
let mut div_dom = Dom::new(DomType::Tag);
let mut div_tag = Tag::new("div");
div_tag.set_attr("id", "divid");
div_dom.set_tag(div_tag);
q.add_child(div_dom);
let mut ul_dom = Dom::new(DomType::Tag);
let ul_tag = Tag::new("ul");
ul_dom.set_tag(ul_tag);
let mut li_dom = Dom::new(DomType::Tag);
let li_tag = Tag::new("li");
li_dom.set_tag(li_tag);
ul_dom.add_child(li_dom);
q.add_child(ul_dom);
assert_eq!(Dom::p_implies_q_tree(&p, &q), true);
}
#[test]
fn eq_test() {
let a = r#"
<head>
<title>sample</title>
</head>
<body>
<h1>section</h1>
<ul>
<li>list1</li>
<li>list2</li>
</ul>
</body>
"#;
let a_dom = parser::parse(&a).unwrap();
let b = r#"
<head>
<title>sample</title>
</head>
<body>
<h1>section</h1>
<ul>
<li>list1</li>
<li>list2</li>
</ul>
</body>
"#;
let b_dom = parser::parse(&b).unwrap();
assert_eq!(a_dom == b_dom, true);
assert_eq!(a_dom != b_dom, false);
}
#[test]
fn ne_test() {
let a = r#"
<head>
<title>sample</title>
</head>
<body>
<h1>section</h1>
<ul>
<li>list1</li>
<li>list2</li>
</ul>
</body>
"#;
let a_dom = parser::parse(&a).unwrap();
let b = r#"
<head>
<title>sample</title>
</head>
<body>
<h1>section</h1>
<ul>
<li>list1</li>
<li>list3</li>
</ul>
</body>
"#;
let b_dom = parser::parse(&b).unwrap();
assert_eq!(a_dom == b_dom, false);
assert_eq!(a_dom != b_dom, true);
}
}