use crate::error::Result;
use itertools::Itertools;
use scraper::ElementRef;
pub struct Html {
value: scraper::Html,
}
impl Html {
pub fn new(html_str: &str) -> Self {
Self {
value: scraper::Html::parse_fragment(html_str),
}
}
pub fn select(&self, selector: &str) -> Result<Selectable<scraper::Html>> {
Selectable::wrap(selector, &self.value)
}
}
pub struct Selectable<'a, T> {
selector: scraper::Selector,
node: &'a T,
}
pub struct HtmlSelectIterator<'a, 'b> {
select: scraper::html::Select<'a, 'b>,
}
pub struct ElementSelectIterator<'a, 'b> {
select: scraper::element_ref::Select<'a, 'b>,
}
pub struct SelectItem<'a> {
element: ElementRef<'a>,
}
impl<'a, T> Selectable<'a, T> {
fn wrap(selector: &str, html: &'a T) -> Result<Selectable<'a, T>> {
Ok(Self {
selector: scraper::Selector::parse(selector)?,
node: html,
})
}
}
pub type SelectableHtml<'a> = Selectable<'a, scraper::Html>;
impl<'a> Selectable<'a, scraper::Html> {
pub fn iter(&self) -> HtmlSelectIterator {
HtmlSelectIterator {
select: self.node.select(&self.selector),
}
}
pub fn first(&self) -> Option<SelectItem> {
self.iter().next()
}
}
pub type SelectableElement<'a> = Selectable<'a, ElementRef<'a>>;
impl<'a> Selectable<'a, ElementRef<'a>> {
pub fn iter(&self) -> ElementSelectIterator {
ElementSelectIterator {
select: self.node.select(&self.selector),
}
}
pub fn first(&self) -> Option<SelectItem> {
self.iter().next()
}
}
impl<'a, 'b> Iterator for HtmlSelectIterator<'a, 'b> {
type Item = SelectItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.select.next().map(Self::Item::new)
}
}
impl<'a, 'b> Iterator for ElementSelectIterator<'a, 'b> {
type Item = SelectItem<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.select.next().map(Self::Item::new)
}
}
pub type CaseSensitivity = scraper::CaseSensitivity;
pub type Classes<'a> = scraper::node::Classes<'a>;
pub type Attrs<'a> = scraper::node::Attrs<'a>;
impl<'a> SelectItem<'a> {
pub fn new(element: ElementRef<'a>) -> Self {
Self { element }
}
pub fn name(&self) -> &str {
self.element.value().name()
}
pub fn id(&self) -> Option<&str> {
self.element.value().id()
}
pub fn has_class(&self, class: &str, case_sensitive: CaseSensitivity) -> bool {
self.element.value().has_class(class, case_sensitive)
}
pub fn classes(&self) -> Classes {
self.element.value().classes()
}
pub fn attrs(&self) -> Attrs {
self.element.value().attrs()
}
pub fn attr(&self, attr: &str) -> Option<&'a str> {
self.element.attr(attr)
}
pub fn text(&self) -> String {
self.element.text().join("").trim().into()
}
pub fn html(&self) -> String {
self.element.html()
}
pub fn inner_html(&self) -> String {
self.element.inner_html()
}
pub fn children(&self) -> impl Iterator<Item = SelectItem<'a>> {
self.element.child_elements().map(SelectItem::new)
}
pub fn select(&self, selector: &str) -> Result<Selectable<'a, ElementRef>> {
Selectable::wrap(selector, &self.element)
}
}
#[cfg(test)]
mod tests {
#[test]
fn test_css_selecctor() {
use super::*;
let html_str = r#"
<html>
<body>
<div id="content">
<p>Hello, World!</p>
<p>This is a test.</p>
</div>
</body>
</html>
"#;
let html = Html::new(html_str);
let content = html.select("#content").unwrap();
let content = content.first();
assert!(content.is_some());
let content = content.unwrap();
assert_eq!(content.attr("id").unwrap(), "content");
let p1 = content.select("p:nth-child(1)").ok().unwrap();
let p1 = p1.first();
assert!(p1.is_some());
assert_eq!(p1.unwrap().text(), "Hello, World!");
let p2 = content.select("p:nth-child(2)").ok().unwrap();
let p2 = p2.first();
assert!(p2.is_some());
assert_eq!(p2.unwrap().text(), "This is a test.");
}
}