bing_image_scrapper/
lib.rs1use regex::Regex;
2use reqwest::header::{HeaderMap, USER_AGENT};
3
4pub async fn scrape_bing_images(query: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
5 let url = format!("https://www.bing.com/images/search?q={}", query);
6 let client = create_client()?;
7 let html = client.get(url).send().await?.text().await?;
8 let image_urls = extract_image_urls(&html)?;
9 Ok(image_urls)
10}
11
12fn create_client() -> Result<reqwest::Client, Box<dyn std::error::Error>> {
13 let mut headers = HeaderMap::new();
14 headers.insert(
15 USER_AGENT,
16 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
17 .parse()?,
18 );
19
20 Ok(reqwest::Client::builder()
21 .default_headers(headers)
22 .build()?)
23}
24
25fn extract_image_urls(html: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
26 let re = Regex::new(r#"murl":"(https?://[^&]+?)""#)?;
27 let mut image_urls = Vec::new();
28
29 for cap in re.captures_iter(html) {
30 if let Some(murl) = cap.get(1) {
31 image_urls.push(murl.as_str().to_string());
32 }
33 }
34
35 Ok(image_urls)
36}