web_scraper/site.rs
1/// Get the HTML content of a website to then scrape the content in it
2///
3/// # Arguments
4///
5/// * `url` - The url of the website
6///
7/// # Returns
8///
9/// * `Ok(String)` - The html content of the website
10/// * `Err(Box<dyn std::error::Error>)` - An error if the request fails or the content type is not text/html
11///
12/// # Examples
13///
14/// ```
15/// #[tokio::main]
16/// async fn main() -> Result<(), Box<dyn std::error::Error + std::marker::Send + std::marker::Sync>> {
17/// use web_scraper::site::get_html;
18/// use web_scraper::HtmlTag;
19/// let url = "https://example.com";
20/// let html = get_html(url).await.unwrap();
21/// let tag = HtmlTag::DIV;
22/// // Parse the <div> tags and collect the results into a vector of strings
23/// let new_vector = tag.parse_tags(&html);
24/// Ok(())
25/// }
26/// ```
27
28pub async fn get_html(url: &str) -> Result<String, Box<dyn std::error::Error>> {
29 let client = reqwest::Client::new();
30 let response = match client.get(url).send().await {
31 Ok(response) => response,
32 Err(e) => {
33 println!("Error: {}\n", e);
34 return Err(
35 "Request failed, check if the url is valid\nFormat: https://example.com\nAlso check if the domain actually exists".to_string().into(),
36 );
37 }
38 };
39
40 let html = response.text().await?;
41
42 Ok(html)
43}