1use futures::future::join_all;
2use regex::Regex;
3use std::env;
4
5const GOOGLE_RESULT_SIZE: usize = 10;
6const GOOGLE_ENDPOINT: &'static str = "https://www.googleapis.com/customsearch/v1";
7
8fn get_creds() -> (String, String) {
9 let api_key =
10 env::var("CUSTOM_SEARCH_API_KEY").expect("variable `CUSTOM_SEARCH_API_KEY` not set");
11 let search_engine_id =
12 env::var("SEARCH_ENGINE_ID").expect("variable `SEARCH_ENGINE_ID` not set");
13
14 (api_key, search_engine_id)
15}
16
17enum SearchType {
18 Image,
19}
20
21#[allow(dead_code)]
22pub struct Params {
23 cx: String,
24 key: String,
25 search_type: SearchType,
26 q: String,
27 num: usize,
28 start: usize,
29}
30
31impl Params {
32 pub fn new() -> Self {
33 let (key, cx) = get_creds();
34 Params {
35 cx,
36 key,
37 search_type: SearchType::Image,
38 q: String::new(),
39 num: 10, start: 0,
41 }
42 }
43
44 pub fn to_list(self) -> Vec<(String, String)> {
45 vec![
46 ("cx".to_string(), self.cx),
47 ("key".to_string(), self.key),
48 ("searchType".to_string(), "image".to_string()), ("q".to_string(), self.q),
50 ("num".to_string(), self.num.to_string()),
51 ("start".to_string(), self.start.to_string()),
52 ]
53 }
54}
55
56pub struct Fetcher {
57 }
59
60impl Fetcher {
61 pub async fn query_api(query: &str, n: usize) -> Vec<String> {
62 let maybe_one = if n % GOOGLE_RESULT_SIZE > 0 { 1 } else { 0 };
64 let nqueries = n / GOOGLE_RESULT_SIZE + maybe_one + 1; let nqueries = usize::min(nqueries, 10);
66
67 let mut futures = vec![];
68 for i in 0..nqueries {
69 let mut offset = 0;
71 if i > 0 {
72 offset = 10 * i + 1;
73 }
74
75 let mut params = Params::new();
77 params.start = offset;
78 params.q = format_query(query);
79
80 futures.push(tokio::spawn(async move {
81 Self::google_search(params).await.unwrap()
82 }));
83 }
84
85 let urls: Vec<Vec<String>> = join_all(futures)
86 .await
87 .into_iter()
88 .map(|x| x.unwrap())
89 .collect();
90
91 let urls = urls.into_iter().flatten().collect();
92 urls
93 }
94
95 async fn google_search(params: Params) -> Result<Vec<String>, reqwest::Error> {
97 let params = params.to_list();
98
99 let client = reqwest::Client::new();
100 let res = client.get(GOOGLE_ENDPOINT).query(¶ms).send().await?;
101 let body = res.text().await?;
102
103 let urls = extract_urls(&body);
104 Ok(urls)
105 }
106}
107
108fn format_query(topic: &str) -> String {
109 let new_topic: Vec<String> = topic.split('_').map(String::from).collect();
111 new_topic.join(" ")
112}
113
114fn extract_urls(text: &str) -> Vec<String> {
116 let re = Regex::new(r#""link":\s*"(https?://[^"]*)""#).unwrap();
118
119 let mut urls = Vec::new();
121
122 for cap in re.captures_iter(text) {
124 if let Some(url) = cap.get(1) {
126 urls.push(url.as_str().to_string());
127 }
128 }
129
130 urls
131}
132
133