web_scraper/
site.rs

1/// Get the HTML content of a website to then scrape the content in it
2///
3/// # Arguments
4///
5/// * `url` - The url of the website
6///
7/// # Returns
8///
9/// * `Ok(String)` - The html content of the website
10/// * `Err(Box<dyn std::error::Error>)` - An error if the request fails or the content type is not text/html
11///
12/// # Examples
13///
14/// ```
15/// #[tokio::main]
16/// async fn main() -> Result<(), Box<dyn std::error::Error + std::marker::Send + std::marker::Sync>> {
17/// use web_scraper::site::get_html;
18/// use web_scraper::HtmlTag;
19/// let url = "https://example.com";
20/// let html = get_html(url).await.unwrap();
21/// let tag = HtmlTag::DIV;
22/// // Parse the <div> tags and collect the results into a vector of strings
23/// let new_vector = tag.parse_tags(&html);
24/// Ok(())
25/// }
26/// ```
27
28pub async fn get_html(url: &str) -> Result<String, Box<dyn std::error::Error>> {
29    let client = reqwest::Client::new();
30    let response = match client.get(url).send().await {
31        Ok(response) => response,
32        Err(e) => {
33            println!("Error: {}\n", e);
34            return Err(
35                "Request failed, check if the url is valid\nFormat: https://example.com\nAlso check if the domain actually exists".to_string().into(),
36            );
37        }
38    };
39
40    let html = response.text().await?;
41
42    Ok(html)
43}