1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
mod bing; mod duckduckgo; mod google; mod stackoverflow; use crate::config::SearchEngine; use crate::error::{Error, Result}; use crate::utils::random_agent; use reqwest::{Client, ClientBuilder, RequestBuilder}; /// Search engine trait pub trait Engine { /// Get relative url to make search through query information. /// /// # Arguments /// /// * `query` - The user input query information. /// * `use_https` - Return query url which is https scheme or http scheme. /// /// # Returns /// /// Return the query url, which can be fired with HTTP GET request. fn get_query_url(&self, query: &str, use_https: bool) -> String; /// Extract stackoverflow links from given page. /// /// # Arguments /// /// * `page` - the search result page, which is mainly fetched from `http GET` method. /// /// Links to the relative question, or returns None if we can't find it. fn extract_links(&self, pages: &str) -> Option<Vec<String>>; } /// Search result links under the given search engine. /// /// This function will go through network to find out useful links. /// /// # Examples /// /// ```rust /// # async fn run() { /// use std::str::FromStr; /// use hors::{self, SearchEngine}; /// /// let search_engine: SearchEngine = SearchEngine::from_str("bing").unwrap(); /// let target_links: Vec<String> = hors::search_links( /// "how to parse json in rust", /// search_engine /// ) /// .await /// .unwrap(); /// assert_ne!(target_links.len(), 0); /// for link in target_links { /// assert!(link.contains("stackoverflow.com")) /// } /// # } /// ``` pub async fn search_links(query: &str, search_engine: SearchEngine) -> Result<Vec<String>> { let client: Client = ClientBuilder::new().cookie_store(true).build()?; search_links_with_client(query, search_engine, &client).await } /// Search result links under the given search engine. /// /// This function will go through network to find out useful links. /// /// # Examples /// /// ```rust /// use std::str::FromStr; /// use hors::{self, Config, OutputOption, Result, SearchEngine}; /// use reqwest::{Client, ClientBuilder}; /// /// # async fn run() { /// let search_engine: SearchEngine = SearchEngine::from_str("bing").unwrap(); /// // please make sure that `cookie_store` should set to `true` in client builder. /// let mut client: Client = ClientBuilder::new().cookie_store(true).build().unwrap(); /// let target_links: Vec<String> = hors::search_links_with_client( /// "how to parse json in rust", /// search_engine, /// &client /// ) /// .await /// .unwrap(); /// assert_ne!(target_links.len(), 0); /// for link in target_links { /// assert!(link.contains("stackoverflow.com")); /// } /// # } /// ``` /// /// # Returns /// /// If search links successfully, it will return a Vector of String, which indicate /// relative links to got answer. Else return an Error. pub async fn search_links_with_client( query: &str, search_engine: SearchEngine, client: &Client, ) -> Result<Vec<String>> { let https_opts: Vec<bool> = vec![true, false]; let engine: Box<dyn Engine> = match search_engine { SearchEngine::Bing => Box::new(bing::Bing), SearchEngine::Google => Box::new(google::Google), SearchEngine::DuckDuckGo => Box::new(duckduckgo::DuckDuckGo), SearchEngine::StackOverflow => Box::new(stackoverflow::StackOverflow::default()), }; for opt in https_opts { let fetch_url: String = get_query_url(query, &*engine, opt); let page: Result<String> = fetch(&fetch_url, client).await; match page { Ok(page) => { let extract_results = extract_links(&page, &*engine); if let Some(links) = extract_results { return Ok(links); } } Err(e) => warn!("Erorr for get url {}: {}", fetch_url, e), } } Err(Error::from_parse("Can't find search result...")) } fn get_query_url(query: &str, search_engine: &dyn Engine, use_https: bool) -> String { search_engine.get_query_url(query, use_https) } /// Fetch actual page according to given url. /// /// # Arguments /// /// * `search_url` - The url which should lead to search result page. /// * `client` - An instance of `request::Client` object which can use to fire http request, /// please ensure that it's build with cookie_store(true) option. /// /// # Returns /// /// If get search result page successfully, it will return the content of page, /// or returns error. async fn fetch(search_url: &str, client: &Client) -> Result<String> { let request: RequestBuilder = client .get(search_url) .header(reqwest::header::USER_AGENT, random_agent()); debug!("Request to bing information: {:?}", request); let res = request.send().await?; let page: String = res.text().await?; Ok(page) } /// Extract links from given page. /// /// # Arguments /// /// * `page` - the search result page, which is mainly got by `fetch` function. /// * `search_engine` - indicate which search engine we can use to extract links out. /// /// # Returns /// /// Links to the relative question, or returns None if we can't find it. fn extract_links(page: &str, search_engine: &dyn Engine) -> Option<Vec<String>> { search_engine.extract_links(page) }