use std::collections::HashMap;
use crate::value::Value;
use regex::{Captures, Regex};
use select::{
document::Document,
node::Node,
predicate::{Any, Attr, Name, Predicate},
};
pub struct Parser<'a> {
document: &'a Document,
css_process: HashMap<&'a str, Box<dyn Fn(Captures) -> Vec<Node<'a>> + 'a>>,
}
impl<'a> Parser<'a> {
pub fn new(document: &'a Document) -> Self {
Parser {
document,
css_process: HashMap::new(),
}
}
pub fn defined_css_processes(&mut self) {
self.css_process.insert(
r"^\.([\w-]+)$",
Box::new(|caps| {
self.document
.find(Any)
.filter(|n| {
n.attr("class")
.map_or(false, |v| v.split(" ").any(|v| v == &caps[1]))
})
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^#([\w-]+)$",
Box::new(|caps| {
self.document
.find(Attr("id", &caps[1]))
.map(|n| n)
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^\.([\w-]+)\.([\w-]+)$",
Box::new(|caps| {
self.document
.find(Any)
.filter(|n| {
n.attr("class")
.map_or(false, |v| v.contains(&caps[1]) && v.contains(&caps[2]))
})
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^([\w-]+)$",
Box::new(|caps| {
self.document
.find(Name(&caps[1]))
.map(|n| n)
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"\[([\w-]+)=([\w-]+)\]",
Box::new(|caps| {
self.document
.find(Attr(&caps[1], &caps[2]))
.map(|n| n)
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^([\w-]+)\.([\w-]+)",
Box::new(|caps| {
self.document
.find(Name(&caps[1]))
.filter(|n| {
n.attr("class")
.map_or(false, |v| v.split(" ").any(|v| v == &caps[2]))
})
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^([\w-]+)\s+([\w-]+)$",
Box::new(|caps| {
self.document
.find(Name(&caps[1]).descendant(Name(&caps[2])))
.map(|n| n)
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"\[([\w-]+)~([\w-]+)\]",
Box::new(|caps| {
let reg = Regex::new(&caps[2]).unwrap();
self.document
.find(Any)
.filter(|n| n.attr(&caps[1]).map_or(false, |v| reg.is_match(v)))
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^([\w-]+)\.([\w-]+)\s+([\w-]+)",
Box::new(|caps| {
self.document
.find(Name(&caps[1]))
.filter(|n| {
n.attr("class")
.map_or(false, |v| v.split(" ").any(|v| v == &caps[2]))
})
.collect::<Vec<Node>>()
.iter()
.map(|n| n.find(Name(&caps[3])).next().unwrap())
.collect::<Vec<Node>>()
}),
);
self.css_process.insert(
r"^([\w-]+)#([\w-]+)\s+([\w-]+)",
Box::new(|caps| {
self.document
.find(Name(&caps[1]))
.filter(|n| n.attr("id").map_or(false, |v| v == &caps[2]))
.collect::<Vec<Node>>()
.iter()
.map(|n| n.find(Name(&caps[3])).next().unwrap())
.collect::<Vec<Node>>()
}),
);
}
pub fn _select(&self, selector: &str) -> Vec<Node> {
let mut nodes = Vec::new();
for (reg, v) in &self.css_process {
let reg = Regex::new(reg).unwrap();
if reg.is_match(selector) {
let caps = reg.captures(selector).unwrap();
nodes = v(caps);
}
}
nodes
}
pub fn select(&self, selector: &str) -> Node {
self._select(selector)[0]
}
pub fn select_all(&self, selector: &str) -> Vec<Node> {
self._select(selector)
}
fn _find<F>(&self, selector: &str, filter: F, attr: &str) -> Value
where
F: FnMut(&select::node::Node<'_>) -> bool,
{
let data = self
._select(selector)
.into_iter()
.filter(filter)
.filter_map(|x| {
if attr == "text" {
Some(x.text().split_whitespace().collect::<String>())
} else {
Some(x.attr(attr).map_or(String::from(""), |x| x.to_string()))
}
})
.map(|x| x.to_string())
.collect::<_>();
Value::LIST(data)
}
pub fn find_all<F>(&self, selector: &str, filter: F, attr: &str) -> Value
where
F: FnMut(&select::node::Node<'_>) -> bool,
{
self._find(selector, filter, attr)
}
pub fn find<F>(&self, selector: &str, filter: F, attr: &str) -> Value
where
F: FnMut(&select::node::Node<'_>) -> bool,
{
let data = self._find(selector, filter, attr);
if let Value::LIST(d) = data {
Value::STR(d[0].clone())
} else {
Value::STR(String::from(""))
}
}
}