use futures::future::join_all;
use regex::Regex;
use std::env;
const GOOGLE_RESULT_SIZE: usize = 10;
const GOOGLE_ENDPOINT: &'static str = "https://www.googleapis.com/customsearch/v1";
fn get_creds() -> (String, String) {
let api_key =
env::var("CUSTOM_SEARCH_API_KEY").expect("variable `CUSTOM_SEARCH_API_KEY` not set");
let search_engine_id =
env::var("SEARCH_ENGINE_ID").expect("variable `SEARCH_ENGINE_ID` not set");
(api_key, search_engine_id)
}
enum SearchType {
Image,
}
#[allow(dead_code)]
pub struct Params {
cx: String,
key: String,
search_type: SearchType,
q: String,
num: usize,
start: usize,
}
impl Params {
pub fn new() -> Self {
let (key, cx) = get_creds();
Params {
cx,
key,
search_type: SearchType::Image,
q: String::new(),
num: 10, start: 0,
}
}
pub fn to_list(self) -> Vec<(String, String)> {
vec![
("cx".to_string(), self.cx),
("key".to_string(), self.key),
("searchType".to_string(), "image".to_string()), ("q".to_string(), self.q),
("num".to_string(), self.num.to_string()),
("start".to_string(), self.start.to_string()),
]
}
}
pub struct Fetcher {
}
impl Fetcher {
pub async fn query_api(query: &str, n: usize) -> Vec<String> {
let maybe_one = if n % GOOGLE_RESULT_SIZE > 0 { 1 } else { 0 };
let nqueries = n / GOOGLE_RESULT_SIZE + maybe_one + 1; let nqueries = usize::min(nqueries, 10);
let mut futures = vec![];
for i in 0..nqueries {
let mut offset = 0;
if i > 0 {
offset = 10 * i + 1;
}
let mut params = Params::new();
params.start = offset;
params.q = format_query(query);
futures.push(tokio::spawn(async move {
Self::google_search(params).await.unwrap()
}));
}
let urls: Vec<Vec<String>> = join_all(futures)
.await
.into_iter()
.map(|x| x.unwrap())
.collect();
let urls = urls.into_iter().flatten().collect();
urls
}
async fn google_search(params: Params) -> Result<Vec<String>, reqwest::Error> {
let params = params.to_list();
let client = reqwest::Client::new();
let res = client.get(GOOGLE_ENDPOINT).query(¶ms).send().await?;
let body = res.text().await?;
let urls = extract_urls(&body);
Ok(urls)
}
}
fn format_query(topic: &str) -> String {
let new_topic: Vec<String> = topic.split('_').map(String::from).collect();
new_topic.join(" ")
}
fn extract_urls(text: &str) -> Vec<String> {
let re = Regex::new(r#""link":\s*"(https?://[^"]*)""#).unwrap();
let mut urls = Vec::new();
for cap in re.captures_iter(text) {
if let Some(url) = cap.get(1) {
urls.push(url.as_str().to_string());
}
}
urls
}