#![allow(unused_imports)]
#![allow(unused_variables)]
use scraper::{self, Html, Selector};
use headless_chrome::Browser;
use reqwest;
use serde_json::Value;
use std::error::Error;
use polars::prelude::*;
use urlencoding;
pub fn search(search_query: &str,max_pages: usize,save_to_path: Option<&str>) -> Result<DataFrame, Box<dyn Error>> {
let browser = Browser::default().unwrap();
let tab = browser.new_tab()?;
let search_url = format!("https://www.google.com/search?q={}", urlencoding::encode(search_query));
tab.navigate_to(&search_url)?;
let result_selector = Selector::parse("div.g").unwrap();
let title_selector = Selector::parse("h3").unwrap();
let url_selector = Selector::parse("a").unwrap();
let snippet_selector = Selector::parse("div.VwiC3b").unwrap();
let mut titles = Vec::new();
let mut urls = Vec::new();
let mut snippets = Vec::new();
for page in 1..=max_pages {
tab.wait_for_element("div.g")?;
let page_html_value = tab.evaluate("document.documentElement.outerHTML", false)?.value.unwrap();
let page_html = match page_html_value {
Value::String(html) => html,
_ => return Err("Failed to extract HTML content as a string.".into()),
};
let document = Html::parse_document(&page_html);
for result in document.select(&result_selector) {
let title = result.select(&title_selector)
.next()
.map(|element| element.text().collect::<Vec<_>>().join(" "))
.unwrap_or_default();
let url = result.select(&url_selector)
.next()
.and_then(|element| element.value().attr("href"))
.map(|s| s.to_string())
.unwrap_or_default();
let snippet = result.select(&snippet_selector)
.next()
.map(|element| element.text().collect::<Vec<_>>().join(" "))
.unwrap_or_default();
titles.push(title);
urls.push(url);
snippets.push(snippet);
}
if page < max_pages {
if let Ok(next_button) = tab.wait_for_element("a#pnnext") {
next_button.click()?;
} else {
println!("No more pages to scrape.");
break;
}
}
}
let df = DataFrame::new(vec![
Series::new("Title".into(), titles),
Series::new("URL".into(), urls),
Series::new("Snippet".into(), snippets),
])?;
if let Some(path) = save_to_path {
let file = std::fs::File::create(path)?;
let mut writer = CsvWriter::new(file);
writer.finish(&mut df.clone())?;
println!("DataFrame saved to: {}", path);
} else {
println!("No path provided, DataFrame not saved.");
}
Ok(df)
}