web_parser/parser/
document.rs

1use crate::prelude::*;
2use super::{ User, Node, Nodes };
3
4/// The website HTML-document
5#[derive(Debug, Clone)]
6pub struct Document {
7    html: scraper::Html,
8    selector: scraper::Selector
9}
10
11impl Document {
12    /// Reads website page as HTML document
13    pub async fn read(url: &str, user: User) -> Result<Self> {
14        let client = reqwest::Client::new();
15
16        let response = client
17            .get(url)
18            .header(reqwest::header::USER_AGENT, user.to_string())
19            .send().await?
20            .text().await?;
21
22
23        Ok(Self {
24            html: scraper::Html::parse_document(&response),
25            selector: scraper::Selector::parse("*").unwrap()
26        })
27    }
28
29    /// Reads website page as simple text
30    pub async fn text(url: &str, user: User) -> Result<String> {
31        let client = reqwest::Client::new();
32
33        let response = client
34            .get(url)
35            .header(reqwest::header::USER_AGENT, user.to_string())
36            .send()
37            .await?
38            .text()
39            .await?;
40
41        Ok(response)
42    }
43
44    /// Reads website page as json
45    pub async fn json<T>(url: &str, user: User) -> Result<serde_json::Result<T>>
46    where
47        T: serde::de::DeserializeOwned
48    {
49        let client = reqwest::Client::new();
50
51        let response = client
52            .get(url)
53            .header(reqwest::header::USER_AGENT, user.to_string())
54            .send()
55            .await?
56            .text()
57            .await?;
58
59        Ok(serde_json::from_str(&response))
60    }
61
62    /// Select HTML node by CSS selector
63    pub fn select(&self, selector: &'static str) -> Result<Option<Node>> {
64        let sel = scraper::Selector::parse(selector)?;
65        
66        let node = self.html
67            .select(&sel)
68            .next()
69            .map(Node::new);
70        
71        Ok(node)
72    }
73
74    /// Select HTML nodes by CSS selector
75    pub fn select_all(&mut self, selector: &'static str) -> Result<Option<Nodes>> {
76        self.selector = scraper::Selector::parse(selector)?;
77        let mut nodes = self.html.select(&self.selector).peekable();
78
79        if nodes.peek().is_some() {
80            Ok(Some(Nodes::new(Some(nodes), None)))
81        } else {
82            Ok(None)
83        }
84    }
85}