web_parser/document/
document.rs

1use crate::prelude::*;
2use super::{ User, Node, Nodes };
3
4/// The website HTML-document
5#[derive(Debug, Clone)]
6pub struct Document {
7    html: scraper::Html,
8    selector: scraper::Selector
9}
10
11impl Document {
12    /// Parse HTML document from text
13    pub fn parse(html: &str) -> Result<Self> {
14        Ok(Self {
15            html: scraper::Html::parse_document(&html),
16            selector: scraper::Selector::parse("html").unwrap()
17        })
18    }
19    
20    /// Reads website page as HTML document
21    pub async fn read(url: &str, user: User) -> Result<Self> {
22        let client = reqwest::Client::new();
23
24        let response = client
25            .get(url)
26            .header(reqwest::header::USER_AGENT, user.to_string())
27            .send().await?
28            .text().await?;
29
30        Self::parse(&response)
31    }
32
33    /// Reads website page as simple text
34    pub async fn text(url: &str, user: User) -> Result<String> {
35        let client = reqwest::Client::new();
36
37        let response = client
38            .get(url)
39            .header(reqwest::header::USER_AGENT, user.to_string())
40            .send()
41            .await?
42            .text()
43            .await?;
44
45        Ok(response)
46    }
47
48    /// Reads website page as json
49    pub async fn json<D: DeserializeOwned>(url: &str, user: User) -> Result<json::Result<D>> {
50        let client = reqwest::Client::new();
51
52        let response = client
53            .get(url)
54            .header(reqwest::header::USER_AGENT, user.to_string())
55            .send()
56            .await?
57            .text()
58            .await?;
59
60        Ok(serde_json::from_str(&response))
61    }
62
63    /// Select HTML node by CSS selector
64    pub fn select(&self, selector: &'static str) -> Result<Option<Node>> {
65        let selector = scraper::Selector::parse(selector).map_err(Error::from)?;
66        
67        let node = self.html
68            .select(&selector)
69            .next()
70            .map(Node::new);
71        
72        Ok(node)
73    }
74
75    /// Select HTML nodes by CSS selector
76    pub fn select_all(&mut self, selector: &'static str) -> Result<Option<Nodes>> {
77        self.selector = scraper::Selector::parse(selector).map_err(Error::from)?;
78        let mut nodes = self.html.select(&self.selector).peekable();
79
80        if nodes.peek().is_some() {
81            Ok(Some(Nodes::new(Some(nodes), None)))
82        } else {
83            Ok(None)
84        }
85    }
86}