use regex::Regex;
use reqwest::header::{HeaderMap, USER_AGENT};
pub async fn scrape_bing_images(query: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
let url = format!("https://www.bing.com/images/search?q={}", query);
let client = create_client()?;
let html = client.get(url).send().await?.text().await?;
let image_urls = extract_image_urls(&html)?;
Ok(image_urls)
}
fn create_client() -> Result<reqwest::Client, Box<dyn std::error::Error>> {
let mut headers = HeaderMap::new();
headers.insert(
USER_AGENT,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
.parse()?,
);
Ok(reqwest::Client::builder()
.default_headers(headers)
.build()?)
}
fn extract_image_urls(html: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
let re = Regex::new(r#"murl":"(https?://[^&]+?)""#)?;
let mut image_urls = Vec::new();
for cap in re.captures_iter(html) {
if let Some(murl) = cap.get(1) {
image_urls.push(murl.as_str().to_string());
}
}
Ok(image_urls)
}