use crate::Error;
use std::{borrow::Cow, str::FromStr};
use tl::*;
pub trait HTMLTagExtension {
fn get_attr<T>(&self, attr: &str) -> Result<Option<T>, Error>
where
T: FromStr;
fn get_attr_str(&self, attr: &str) -> Option<String>;
}
impl<'a> HTMLTagExtension for HTMLTag<'a> {
fn get_attr<T>(&self, attr: &str) -> Result<Option<T>, Error>
where
T: FromStr,
{
let s = self.get_attr_str(attr);
if s.is_none() {
return Ok(None);
}
match s.unwrap().parse::<T>() {
Ok(t) => Ok(Some(t)),
Err(_) => Err(Error::ParseError),
}
}
fn get_attr_str(&self, attr: &str) -> Option<String> {
let result = self.attributes().get(attr).flatten()?;
Some(result.as_utf8_str().to_string())
}
}
pub trait NodeHandleExtension {
fn inner_text<'b, 'p: 'b>(&self, parser: &'p tl::Parser<'b>) -> Option<Cow<'b, str>>;
fn to_rich<'a>(self, d: &'a VDom<'a>) -> RichNode<'a>;
}
impl NodeHandleExtension for NodeHandle {
fn inner_text<'b, 'p: 'b>(&self, parser: &'p tl::Parser<'b>) -> Option<Cow<'b, str>> {
let node = self.get(parser)?;
Some(node.inner_text(parser))
}
fn to_rich<'a>(self, d: &'a VDom<'a>) -> RichNode<'a> {
RichNode::<'a> { d, n: Some(self) }
}
}
#[derive(Debug, Clone, Copy)]
pub struct RichNode<'a> {
pub d: &'a VDom<'a>,
pub n: Option<NodeHandle>,
}
fn cmp_class<'a>(class: &'a str) -> impl Fn(RichNode<'a>) -> bool {
move |n: RichNode| {
let x = n.get().unwrap().as_tag();
if x.is_none() {
return false;
}
x.unwrap().attributes().is_class_member(class)
}
}
fn cmp_tag<'a>(tag: &'a str) -> impl Fn(RichNode<'a>) -> bool {
move |n: RichNode| {
let x = n.get().unwrap().as_tag();
if x.is_none() {
return false;
}
x.unwrap().name().eq(tag)
}
}
impl<'a> RichNode<'a> {
pub fn find_where(self, f: impl Fn(RichNode<'a>) -> bool) -> RichNode<'a>{
if self.n.is_none() {
return RichNode {d: self.d, n: None};
}
let n = dfs_first_where(self, &f);
RichNode{d: self.d, n}
}
pub fn find(self, class: &'a str) -> RichNode<'a> {
self.find_where(&cmp_class(class))
}
pub fn find_tag(self, tag: &'a str) -> RichNode<'a> {
self.find_where(&cmp_tag(tag))
}
pub fn find_all(self, class: &str) -> Vec<RichNode<'a>> {
if self.n.is_none() {
return Vec::<RichNode<'a>>::new();
}
self.d
.select_nodes(self.n.unwrap(), class)
.iter()
.map(|n| n.to_rich(self.d))
.collect()
}
pub fn child(self, index: u32) -> Option<RichNode<'a>> {
let mut i = 0;
for x in self.get()?.children()?.top().iter() {
if x.get(self.d.parser()).unwrap().as_tag().is_some() {
if index == i {
return Some(x.to_rich(self.d));
}
i += 1;
}
}
None
}
pub fn get_attr_str(&self, attr: &str) -> Option<String> {
let tag = self.n.and_then(|n| n.get(self.d.parser()))?.as_tag()?;
let result = tag.attributes().get(attr).flatten()?;
Some(result.as_utf8_str().to_string())
}
pub fn get_attr_str_esc(&self, attr: &str) -> Option<String> {
let result = self.get_attr_str(attr)?;
let result = html_escape::decode_html_entities(&result);
Some(result.to_string())
}
pub fn get_attr<T>(&self, attr: &str) -> Result<Option<T>, Error>
where
T: FromStr,
{
let s = self.get_attr_str(attr);
if s.is_none() {
return Ok(None);
}
match s.unwrap().parse::<T>() {
Ok(t) => Ok(Some(t)),
Err(_) => Err(Error::ParseError),
}
}
pub fn has_class(self, class: &'static str) -> Option<bool> {
Some(self.n?
.get(self.d.parser())?
.as_tag()?
.attributes()
.is_class_member(class))
}
pub fn inner_parse<T>(self) -> Result<Option<T>, Error>
where
T: FromStr,
{
let t = self.inner_text();
if t.is_none() {
return Ok(None);
}
match t.unwrap().parse::<T>() {
Ok(x) => Ok(Some(x)),
Err(_) => Err(Error::ParseError),
}
}
pub fn inner_text(self) -> Option<String> {
self.n
.and_then(|n| n.inner_text(self.d.parser()))
.map(|c| c.to_string())
}
pub fn get(self) -> Option<&'a Node<'a>> {
self.n.and_then(|n| n.get(self.d.parser()))
}
}
pub trait VDomExtension<'a> {
fn select_nodes(&'a self, h: NodeHandle, class: &str) -> Vec<NodeHandle>;
}
impl<'a> VDomExtension<'a> for VDom<'a> {
fn select_nodes(&'a self, h: NodeHandle, class: &str) -> Vec<NodeHandle> {
let mut result = Vec::<NodeHandle>::new();
dfs(h, self, class, &mut result);
result
}
}
fn dfs<'a>(h: NodeHandle, dom: &'a VDom<'a>, class: &str, result: &mut Vec<NodeHandle>) {
let tag = h.get(dom.parser()).unwrap().as_tag();
if tag.is_none() {
return;
}
if tag.unwrap().attributes().is_class_member(class) {
result.push(h);
}
let children = h.get(dom.parser()).unwrap().children();
if children.is_none() {
return;
}
for &c in children.unwrap().top().iter() {
dfs(c, dom, class, result);
}
}
fn dfs_first_where<'a>(h: RichNode<'a>, f: &dyn Fn(RichNode<'a>)->bool) -> Option<NodeHandle> {
if f(h) {
return h.n;
}
let children = h.get().unwrap().children()?;
for &c in children.top().iter() {
let k = c.to_rich(h.d);
if let Some(x) = dfs_first_where(k, f) {
return Some(x);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
pub fn dfs_test() {
let input = include_str!("./testdata/dfs.html");
let dom = tl::parse(input, tl::ParserOptions::default()).unwrap();
let nodes = dom.select_nodes(dom.children()[0], "abc");
assert_eq!(nodes.len(), 2);
assert_eq!(
nodes[0].get(dom.parser()).unwrap().inner_text(dom.parser()),
"dist1ll"
);
}
#[test]
pub fn rich() {
let input = include_str!("./testdata/rich.html");
let dom = tl::parse(input, tl::ParserOptions::default()).unwrap();
let root = dom.children()[0].to_rich(&dom);
let inner = root.find("b").find("x").find("y");
assert!(inner.n.is_some());
let inner = root.find("nsaedoi");
assert!(inner.n.is_none());
let inner = root.find("b").find("c").find_all("d");
assert_eq!(inner.len(), 2);
assert_eq!(inner[0].inner_text().unwrap(), "d1".to_string());
}
#[test]
pub fn has_class() {
let input = include_str!("./testdata/rich.html");
let dom = tl::parse(input, tl::ParserOptions::default()).unwrap();
let root = dom.get_element_by_id("has-class").unwrap().to_rich(&dom);
let n = root.find("first");
assert!(n.has_class("first").unwrap());
assert!(n.has_class("second").unwrap());
assert!(!n.has_class("third").unwrap());
}
#[test]
pub fn find_tag() {
let input = include_str!("./testdata/tl/ext1.html");
let dom = tl::parse(input, tl::ParserOptions::default()).unwrap();
let root = dom.get_element_by_id("find-tag").unwrap().to_rich(&dom);
assert!(root.find_tag("a").has_class("link").unwrap());
assert!(root.find_tag("img").has_class("image").unwrap());
assert!(root.find_tag("table").n.is_some());
assert!(root.find_tag("span").n.is_none());
}
#[test]
pub fn child_index() {
let input = include_str!("./testdata/tl/ext1.html");
let dom = tl::parse(input, tl::ParserOptions::default()).unwrap();
let root = dom.get_element_by_id("root-child").unwrap().to_rich(&dom);
assert!(root.child(0).unwrap().get_attr_str("href").is_some());
assert!(root.child(0).unwrap().get_attr_str("class").is_none());
assert!(root.child(1).unwrap().get_attr_str("class").is_some());
}
}