web_parser/document/
node.rs1use crate::prelude::*;
2use super::Nodes;
3
4#[derive(Debug, Clone)]
6pub struct Node<'a> {
7 element: scraper::ElementRef<'a>,
8 selector: scraper::Selector
9}
10
11impl<'a> Node<'a> {
12 pub(crate) fn new(element: scraper::ElementRef<'a>) -> Self {
14 Self {
15 element,
16 selector: scraper::Selector::parse("*").unwrap()
17 }
18 }
19
20 pub fn select(&self, selector: &'static str) -> Result<Option<Node<'a>>> {
22 let sel = scraper::Selector::parse(selector).map_err(Error::from)?;
23
24 let node = self.element
25 .select(&sel)
26 .next()
27 .map(Node::new);
28
29 Ok(node)
30 }
31
32 pub fn select_all(&mut self, selector: &'static str) -> Result<Option<Nodes>> {
34 self.selector = scraper::Selector::parse(selector).map_err(Error::from)?;
35 let mut nodes = self.element.select(&self.selector).peekable();
36
37 if nodes.peek().is_some() {
38 Ok(Some(Nodes::new(None, Some(nodes))))
39 } else {
40 Ok(None)
41 }
42 }
43
44 pub fn parent(&self) -> Option<Node<'a>> {
46 self.element.parent()
47 .and_then(|node| scraper::ElementRef::wrap(node))
48 .map(Node::new)
49 }
50
51 pub fn attr(&self, name: &str) -> Option<&str> {
53 self.element.value().attr(name)
54 }
55
56 pub fn text(&self) -> String {
58 self.element.text().collect()
59 }
60
61 pub fn html(&self) -> String {
63 self.element.html()
64 }
65
66 pub fn filter_text(&self, black_list: &[&str]) -> String {
68 Self::filter_elem_text(self.element, black_list)
69 .split_whitespace()
70 .collect::<Vec<_>>()
71 .join(" ")
72 }
73
74 fn filter_elem_text(node: scraper::element_ref::ElementRef, black_list: &[&str]) -> String {
76 let tag_name = node.value().name();
77
78 if black_list.contains(&tag_name) {
80 return String::new();
81 }
82
83 let mut result = String::new();
85
86 for child in node.children() {
87 match child.value() {
88 scraper::node::Node::Text(text) => {
89 result.push(' ');
90 result.push_str(text);
91 }
92 scraper::node::Node::Element(_) => {
93 if let Some(child_element) = scraper::ElementRef::wrap(child) {
94 result.push(' ');
95 result.push_str(&Self::filter_elem_text(child_element, black_list));
96 }
97 }
98 _ => {}
99 }
100 }
101 result
102 }
103}