so/stackexchange/
search.rs

1use futures::stream::StreamExt;
2use rayon::prelude::*;
3use reqwest::header;
4use reqwest::Client;
5use std::sync::Arc;
6
7use crate::config::{Config, SearchEngine};
8use crate::error::{Error, Result};
9use crate::tui::markdown;
10use crate::tui::markdown::Markdown;
11
12use super::api::{Answer, Api, Question};
13use super::local_storage::SiteMap;
14use super::scraper::{DuckDuckGo, Google, ScrapedData, Scraper};
15
16/// Limit on concurrent requests (gets passed to `buffer_unordered`)
17const CONCURRENT_REQUESTS_LIMIT: usize = 8;
18
19/// This structure provides methods to search queries and get StackExchange
20/// questions/answers in return.
21// TODO this really needs a better name...
22#[derive(Debug, Clone)]
23pub struct Search {
24    pub api: Api,
25    pub config: Config,
26    pub query: String,
27    pub site_map: Arc<SiteMap>,
28}
29
30#[derive(Debug, Clone)]
31pub struct LuckyAnswer {
32    /// Preprocessed markdown content
33    pub answer: Answer<String>,
34    /// Parent question
35    pub question: Question<String>,
36}
37
38impl Search {
39    pub fn new(config: Config, site_map: Arc<SiteMap>, query: String) -> Self {
40        let api = Api::new(config.api_key.clone());
41        Search {
42            api,
43            config,
44            query,
45            site_map,
46        }
47    }
48
49    /// Search query and get the top answer body
50    ///
51    /// For StackExchange engine, use only the first configured site,
52    /// since, parodoxically, sites with the worst results will finish
53    /// executing first, because there's less data to retrieve.
54    ///
55    /// Needs mut because it temporarily changes self.config
56    pub async fn search_lucky(&mut self) -> Result<LuckyAnswer> {
57        let original_config = self.config.clone();
58        // Temp set lucky config
59        self.config.limit = 1;
60        if let SearchEngine::StackExchange = self.config.search_engine {
61            self.config.sites.truncate(1);
62        }
63        // Run search with temp config
64        let result = self.search().await;
65        // Reset config
66        self.config = original_config;
67
68        let question = result?.into_iter().next().ok_or(Error::NoResults)?;
69
70        let answer = question.answers.first().cloned().ok_or_else(|| {
71            Error::StackExchange(String::from("Received question with no answers"))
72        })?;
73
74        Ok(LuckyAnswer { answer, question })
75    }
76
77    /// Search and parse to Markdown for TUI
78    pub async fn search_md(&self) -> Result<Vec<Question<Markdown>>> {
79        Ok(parse_markdown(self.search().await?))
80    }
81
82    /// Search using the configured search engine
83    pub async fn search(&self) -> Result<Vec<Question<String>>> {
84        match self.config.search_engine {
85            SearchEngine::DuckDuckGo => self.search_by_scraper(DuckDuckGo).await,
86            SearchEngine::Google => self.search_by_scraper(Google).await,
87            SearchEngine::StackExchange => self.parallel_search_advanced().await,
88        }
89        .and_then(|qs| {
90            if qs.is_empty() {
91                Err(Error::NoResults)
92            } else {
93                Ok(qs)
94            }
95        })
96    }
97
98    /// Search query at duckduckgo and then fetch the resulting questions from SE.
99    async fn search_by_scraper(&self, scraper: impl Scraper) -> Result<Vec<Question<String>>> {
100        let url = scraper.get_url(&self.query, self.site_map.values());
101        let html = Client::new()
102            .get(url)
103            .header(header::USER_AGENT, super::USER_AGENT)
104            .send()
105            .await?
106            .text()
107            .await?;
108        let data = scraper.parse(&html, self.site_map.as_ref(), self.config.limit)?;
109        log::debug!("Scraped question IDs: {:#?}", &data.question_ids);
110        self.parallel_questions(data).await
111    }
112
113    /// Parallel requests against the SE question endpoint across all sites in data.
114    // TODO I'm sure there is a way to DRY the following two functions
115    async fn parallel_questions(&self, data: ScrapedData) -> Result<Vec<Question<String>>> {
116        let ScrapedData {
117            question_ids,
118            ordering,
119        } = data;
120        futures::stream::iter(question_ids)
121            .map(|(site, ids)| {
122                let api = self.api.clone();
123                tokio::spawn(async move { api.questions(&site, ids).await })
124            })
125            .buffer_unordered(CONCURRENT_REQUESTS_LIMIT)
126            .collect::<Vec<_>>()
127            .await
128            .into_iter()
129            .map(|r| r.map_err(Error::from).and_then(|x| x))
130            .collect::<Result<Vec<Vec<_>>>>()
131            .map(|v| {
132                let mut qs: Vec<Question<String>> = v.into_iter().flatten().collect();
133                qs.sort_unstable_by_key(|q| ordering.get(&q.id.to_string()).unwrap());
134                qs
135            })
136    }
137
138    /// Parallel requests against the SE search/advanced endpoint across all configured sites
139    async fn parallel_search_advanced(&self) -> Result<Vec<Question<String>>> {
140        futures::stream::iter(self.config.sites.clone())
141            .map(|site| {
142                let api = self.api.clone();
143                let limit = self.config.limit;
144                let query = self.query.clone();
145                tokio::spawn(async move { api.search_advanced(&query, &site, limit).await })
146            })
147            .buffer_unordered(CONCURRENT_REQUESTS_LIMIT)
148            .collect::<Vec<_>>()
149            .await
150            .into_iter()
151            .map(|r| r.map_err(Error::from).and_then(|x| x))
152            .collect::<Result<Vec<Vec<_>>>>()
153            .map(|v| {
154                let mut qs: Vec<Question<String>> = v.into_iter().flatten().collect();
155                if self.config.sites.len() > 1 {
156                    qs.sort_unstable_by_key(|q| -q.score);
157                }
158                qs
159            })
160    }
161}
162
163/// Parse all markdown fields
164/// This only happens for content going into the cursive TUI (not lucky prompt)
165fn parse_markdown(qs: Vec<Question<String>>) -> Vec<Question<Markdown>> {
166    qs.into_par_iter()
167        .map(|q| {
168            let body = markdown::parse(q.body);
169            let answers = q
170                .answers
171                .into_par_iter()
172                .map(|a| {
173                    let body = markdown::parse(a.body);
174                    Answer {
175                        body,
176                        id: a.id,
177                        score: a.score,
178                        is_accepted: a.is_accepted,
179                    }
180                })
181                .collect::<Vec<_>>();
182            Question {
183                body,
184                answers,
185                id: q.id,
186                score: q.score,
187                title: q.title,
188                site: q.site,
189            }
190        })
191        .collect::<Vec<_>>()
192}
193
194// TODO find a query that returns no results so that I can test it and
195// differentiate it from a blocked request
196#[cfg(test)]
197mod tests {
198
199    #[test]
200    fn test_duckduckgo_response() {
201        // TODO make sure results are either 1) answers 2) failed connection 3) blocked
202    }
203}