use std::fmt::Debug;
use std::ops::Index;
use crate::html::{HtmlContent, HtmlQueryable};
use log::trace;
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CssPseudoClass {
FirstChild,
NthChild(usize),
FirstOfType,
NthOfType(usize),
LastChild,
NthLastChild(usize),
LastOfType,
NthLastOfType(usize),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CssAttributeComparison {
Exist,
Starts,
Ends,
CharacterContains,
TermContains,
EqualsExact,
EqualsTillHyphen,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CssAttributeSelector<'a> {
pub(crate) attribute: &'a str,
pub(crate) operator: CssAttributeComparison,
pub(crate) value: Option<&'a str>,
}
impl<'a> CssAttributeSelector<'a> {
pub(crate) fn matches(&self, attribute: impl Into<String>) -> bool {
let given_value = attribute.into();
if self.operator == CssAttributeComparison::Exist {
return true;
}
let expected_value = self.value.expect(
"If operator is not Exist a value must be given or the parser works incorrectly",
);
match self.operator {
CssAttributeComparison::Exist => unreachable!(),
CssAttributeComparison::Starts => given_value.starts_with(expected_value),
CssAttributeComparison::Ends => given_value.ends_with(expected_value),
CssAttributeComparison::CharacterContains => given_value.contains(expected_value),
CssAttributeComparison::TermContains => {
given_value.split_whitespace().any(|x| x == expected_value)
}
CssAttributeComparison::EqualsExact => given_value.eq(expected_value),
CssAttributeComparison::EqualsTillHyphen => {
Self::equals_till_hyphen(expected_value, given_value)
}
}
}
fn equals_till_hyphen(expected_value: &str, given_value: String) -> bool {
match given_value.find('-') {
None => given_value.eq(expected_value),
Some(position) => {
let slice = &given_value[0..position];
slice.eq(expected_value)
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CssSelector<'a> {
pub(crate) element: Option<&'a str>,
pub(crate) id: Option<&'a str>,
pub(crate) classes: Vec<&'a str>,
pub(crate) pseudo_classes: Vec<CssPseudoClass>,
pub(crate) attributes: Vec<CssAttributeSelector<'a>>,
}
impl<'a> CssSelector<'a> {
#[cfg(test)]
pub(crate) fn for_element(element: &'a str) -> Self {
CssSelector {
element: Some(element),
id: None,
classes: vec![],
pseudo_classes: vec![],
attributes: vec![],
}
}
#[cfg(test)]
pub(crate) fn for_id(id: &'a str) -> Self {
CssSelector {
element: None,
id: Some(id),
classes: vec![],
pseudo_classes: vec![],
attributes: vec![],
}
}
#[cfg(test)]
pub(crate) fn for_class(class: &'a str) -> Self {
CssSelector {
element: None,
id: None,
classes: vec![class],
pseudo_classes: vec![],
attributes: vec![],
}
}
#[cfg(test)]
pub(crate) fn for_classes(classes: Vec<&'a str>) -> Self {
CssSelector {
element: None,
id: None,
classes,
pseudo_classes: vec![],
attributes: vec![],
}
}
#[cfg(test)]
pub(crate) fn for_attribute(attribute: CssAttributeSelector<'a>) -> Self {
CssSelector {
element: None,
id: None,
classes: vec![],
pseudo_classes: vec![],
attributes: vec![attribute],
}
}
pub(crate) fn query(
&self,
nodes: &Vec<rctree::Node<HtmlContent>>,
) -> Vec<rctree::Node<HtmlContent>> {
let mut findings = vec![];
for node in nodes {
if node.matches_selector(self) {
findings.push(rctree::Node::clone(node));
}
}
findings
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CssSelectorCombinator {
Start,
Descendent,
DirectChild,
GeneralSibling,
AdjacentSibling,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CssSelectorStep<'a> {
pub selector: CssSelector<'a>,
pub combinator: CssSelectorCombinator,
}
impl<'a> CssSelectorStep<'a> {
pub fn start(selector: CssSelector<'a>) -> Self {
CssSelectorStep {
selector,
combinator: CssSelectorCombinator::Start,
}
}
pub fn direct_child(selector: CssSelector<'a>) -> Self {
CssSelectorStep {
selector,
combinator: CssSelectorCombinator::DirectChild,
}
}
pub fn descendent(selector: CssSelector<'a>) -> Self {
CssSelectorStep {
selector,
combinator: CssSelectorCombinator::Descendent,
}
}
pub fn general_sibling(selector: CssSelector<'a>) -> Self {
CssSelectorStep {
selector,
combinator: CssSelectorCombinator::GeneralSibling,
}
}
pub fn adjacent_sibling(selector: CssSelector<'a>) -> Self {
CssSelectorStep {
selector,
combinator: CssSelectorCombinator::AdjacentSibling,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CssSelectorPath<'a>(Vec<CssSelectorStep<'a>>);
impl<'a> CssSelectorPath<'a> {
#[cfg(test)]
pub fn single(step: CssSelector<'a>) -> Self {
CssSelectorPath(vec![CssSelectorStep::start(step)])
}
pub fn new(start: CssSelector<'a>, rest: Vec<CssSelectorStep<'a>>) -> Self {
let mut list = vec![CssSelectorStep::start(start)];
list.extend_from_slice(&rest);
CssSelectorPath(list)
}
pub(crate) fn query(
&self,
start: &[rctree::Node<HtmlContent>],
) -> Vec<rctree::Node<HtmlContent>> {
let mut findings = start.iter().map(rctree::Node::clone).collect::<Vec<_>>();
for step in &self.0 {
let candidates = Self::resolve_combinator(&step.combinator, findings);
findings = step.selector.query(&candidates);
}
findings
}
fn resolve_combinator(
combinator: &CssSelectorCombinator,
source: Vec<rctree::Node<HtmlContent>>,
) -> Vec<rctree::Node<HtmlContent>> {
match combinator {
CssSelectorCombinator::Start => source
.iter()
.flat_map(|s| s.descendants())
.collect::<Vec<_>>(),
CssSelectorCombinator::Descendent => source
.iter()
.flat_map(|s| s.descendants().filter(move |d| s != d))
.collect::<Vec<_>>(),
CssSelectorCombinator::DirectChild => {
source.iter().flat_map(|s| s.children()).collect::<Vec<_>>()
}
CssSelectorCombinator::GeneralSibling => source
.iter()
.flat_map(|s| s.following_siblings().filter(move |d| s != d))
.collect::<Vec<_>>(),
CssSelectorCombinator::AdjacentSibling => source
.iter()
.flat_map(Self::find_tag_sibling)
.collect::<Vec<_>>(),
}
}
fn find_tag_sibling(start: &rctree::Node<HtmlContent>) -> Option<rctree::Node<HtmlContent>> {
let mut candidate = start.next_sibling();
while let Some(node) = candidate {
if node.borrow().is_tag() {
return Some(node);
} else {
candidate = node.next_sibling();
}
}
None
}
}
impl<'a> Index<usize> for CssSelectorPath<'a> {
type Output = CssSelectorStep<'a>;
fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}
#[derive(Debug, PartialEq, Clone, Eq)]
pub struct CssSelectorList<'a>(Vec<CssSelectorPath<'a>>);
impl<'a> CssSelectorList<'a> {
pub fn new(content: Vec<CssSelectorPath<'a>>) -> Self {
CssSelectorList(content)
}
pub(crate) fn query(
&self,
start: &[rctree::Node<HtmlContent>],
) -> Vec<rctree::Node<HtmlContent>> {
trace!("Querying using Selector {:#?}", &self.0);
self.0
.iter()
.flat_map(|p| p.query(start))
.collect::<Vec<_>>()
}
}