web_parser/document/
document.rs1use crate::prelude::*;
2use super::{ User, Node, Nodes };
3
4#[derive(Debug, Clone)]
6pub struct Document {
7 html: scraper::Html,
8 selector: scraper::Selector
9}
10
11impl Document {
12 pub fn parse(html: &str) -> Result<Self> {
14 Ok(Self {
15 html: scraper::Html::parse_document(&html),
16 selector: scraper::Selector::parse("html").unwrap()
17 })
18 }
19
20 pub async fn read(url: &str, user: User) -> Result<Self> {
22 let client = reqwest::Client::new();
23
24 let response = client
25 .get(url)
26 .header(reqwest::header::USER_AGENT, user.to_string())
27 .send().await?
28 .text().await?;
29
30 Self::parse(&response)
31 }
32
33 pub async fn text(url: &str, user: User) -> Result<String> {
35 let client = reqwest::Client::new();
36
37 let response = client
38 .get(url)
39 .header(reqwest::header::USER_AGENT, user.to_string())
40 .send()
41 .await?
42 .text()
43 .await?;
44
45 Ok(response)
46 }
47
48 pub async fn json<D: DeserializeOwned>(url: &str, user: User) -> Result<json::Result<D>> {
50 let client = reqwest::Client::new();
51
52 let response = client
53 .get(url)
54 .header(reqwest::header::USER_AGENT, user.to_string())
55 .send()
56 .await?
57 .text()
58 .await?;
59
60 Ok(serde_json::from_str(&response))
61 }
62
63 pub fn select(&self, selector: &'static str) -> Result<Option<Node>> {
65 let selector = scraper::Selector::parse(selector).map_err(Error::from)?;
66
67 let node = self.html
68 .select(&selector)
69 .next()
70 .map(Node::new);
71
72 Ok(node)
73 }
74
75 pub fn select_all(&mut self, selector: &'static str) -> Result<Option<Nodes>> {
77 self.selector = scraper::Selector::parse(selector).map_err(Error::from)?;
78 let mut nodes = self.html.select(&self.selector).peekable();
79
80 if nodes.peek().is_some() {
81 Ok(Some(Nodes::new(Some(nodes), None)))
82 } else {
83 Ok(None)
84 }
85 }
86}