subscan/types/query.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
use std::collections::BTreeSet;
use derive_more::From;
use itertools::Itertools;
use reqwest::Url;
use super::core::Subdomain;
/// Data type to store search URL query param
/// for search engines like `Google`, `Yahoo`, `Bing`, etc.
#[derive(Clone, Debug, From)]
#[from(&str)]
pub struct SearchQueryParam(String);
#[allow(clippy::to_string_trait_impl)]
impl ToString for SearchQueryParam {
/// Clones inner value and returns it as a [`String`]
///
/// # Examples
///
/// ```no_run
/// use subscan::types::query::SearchQueryParam;
///
/// #[tokio::main]
/// async fn main() {
/// let param = SearchQueryParam::from("q");
///
/// let as_string = param.to_string();
///
/// // do something with string query param
/// }
/// ```
fn to_string(&self) -> String {
self.0.clone()
}
}
impl SearchQueryParam {
/// Get fully [`SearchQuery`] object from [`SearchQueryParam`]
/// configured by the given `domain` and `prefix` params
///
/// # Example
///
/// ```
/// use subscan::types::query::SearchQueryParam;
///
/// let domain = "foo.com";
/// let prefix = "site:";
///
/// let param = SearchQueryParam::from("q");
/// let mut search_query = param.to_search_query(domain, prefix);
///
/// assert_eq!(search_query.domain, domain);
/// assert_eq!(search_query.prefix, prefix);
/// assert_eq!(search_query.as_search_str(), "site:foo.com".to_string());
/// ```
pub fn to_search_query(&self, domain: &str, prefix: &str) -> SearchQuery {
SearchQuery::new(self.clone(), prefix, domain)
}
}
/// To store and manage full search query string for
/// search engines. Uses while enumerating subdomains.
/// End of the day, the query looks like
/// `site:foo.com -www -api -app`
#[derive(Debug)]
pub struct SearchQuery {
/// URL query param while used the full query
pub param: SearchQueryParam,
/// If available query prefix like google dorks
/// `site:`, `inurl:`, `intext:`, etc.
pub prefix: String,
/// Target domain to be included in query
pub domain: String,
/// Query state, already founded subdomains listed
/// in this state and creates a new query by using these
/// subdomains. These values adds end of the query with
/// dash (`-`) prefix, so search engines does not list
/// these subdomains anymore
pub state: BTreeSet<String>,
}
impl SearchQuery {
/// Create a new [`SearchQuery`] instance with `prefix` and `domain` values
///
/// # Examples
///
/// ```no_run
/// use subscan::types::query::{SearchQuery, SearchQueryParam};
///
/// #[tokio::main]
/// async fn main() {
/// let param = SearchQueryParam::from("s");
/// let query = SearchQuery::new(param, "site:", "foo.com");
///
/// // do something with query
/// }
/// ```
pub fn new(param: SearchQueryParam, prefix: &str, domain: &str) -> Self {
Self {
param,
prefix: prefix.to_string(),
domain: domain.to_string(),
state: BTreeSet::new(),
}
}
/// Update query state with a single [`Subdomain`] value
///
/// # Examples
///
/// ```
/// use subscan::types::query::{SearchQuery, SearchQueryParam};
/// use subscan::types::core::Subdomain;
///
/// let param = SearchQueryParam::from("s");
/// let mut query = SearchQuery::new(param, "site:", "foo.com");
///
/// assert_eq!(query.as_search_str(), String::from("site:foo.com"));
/// assert_eq!(query.update(Subdomain::from("api.foo.com")), true);
/// assert_eq!(query.as_search_str(), String::from("site:foo.com -api"));
/// assert_eq!(query.update(Subdomain::from("api.foo.com")), false);
/// assert_eq!(query.update(Subdomain::from("bar")), false);
/// ```
pub fn update(&mut self, sub: Subdomain) -> bool {
let formatted = format!(".{}", self.domain);
if let Some(stripped) = sub.strip_suffix(&formatted) {
self.state.insert(format!("-{}", stripped.trim()))
} else {
false
}
}
/// Update query state with many [`Subdomain`] value
///
/// # Examples
///
/// ```
/// use std::collections::BTreeSet;
/// use subscan::types::query::{SearchQuery, SearchQueryParam};
/// use subscan::types::core::Subdomain;
///
/// let param = SearchQueryParam::from("s");
///
/// let news = BTreeSet::from_iter([
/// Subdomain::from("api.foo.com"),
/// Subdomain::from("app.foo.com"),
/// ]);
///
/// let mut query = SearchQuery::new(param, "site:", "foo.com");
///
/// assert_eq!(query.as_search_str(), String::from("site:foo.com"));
/// assert_eq!(query.update_many(news.clone()), true);
/// assert_eq!(query.as_search_str(), String::from("site:foo.com -api -app"));
/// assert_eq!(query.update_many(news), false);
/// ```
pub fn update_many(&mut self, subs: BTreeSet<Subdomain>) -> bool {
let filter_stmt = |item: &&String| self.update(item.to_string());
subs.iter().filter(filter_stmt).count() > 0
}
/// Returns fully query as a searchable on search engine
///
/// # Examples
///
/// ```
/// use subscan::types::query::{SearchQuery, SearchQueryParam};
/// use subscan::types::core::Subdomain;
///
/// let param = SearchQueryParam::from("s");
/// let mut query = SearchQuery::new(param, "site:", "foo.com");
///
/// assert_eq!(query.as_search_str(), "site:foo.com");
///
/// query.update("bar.foo.com".into());
/// query.update("baz.foo.com".into());
///
/// assert_eq!(query.as_search_str(), "site:foo.com -bar -baz")
/// ````
pub fn as_search_str(&mut self) -> String {
let suffix = self.state.iter().join(" ");
if suffix.is_empty() {
format!("{}{}", self.prefix, self.domain)
} else {
format!("{}{} {}", self.prefix, self.domain, suffix.trim())
}
}
/// According to given `base_url` returns searchable
/// [`reqwest::Url`] that includes fully search query
/// with current query state. Also extra URL query
/// parameters configurable with `extra_params` parameter
///
/// # Examples
///
/// ```
/// use subscan::types::query::{SearchQuery, SearchQueryParam};
/// use reqwest::Url;
///
/// let param = SearchQueryParam::from("s");
/// let base_url = Url::parse("https://bar.com").unwrap();
/// let extra_params = &[("bar".to_string(), "baz".to_string())];
///
/// let expected_url = Url::parse("https://bar.com/?bar=baz&s=site%3Afoo.com").unwrap();
///
/// let mut query = SearchQuery::new(param, "site:", "foo.com");
///
/// assert_eq!(query.as_url(base_url, extra_params), expected_url);
/// ````
pub fn as_url(&mut self, base_url: Url, extra_params: &[(String, String)]) -> Url {
let query_param = &[(self.param.to_string(), self.as_search_str())];
let params = [extra_params, query_param].concat();
Url::parse_with_params(base_url.as_ref(), params).expect("URL parse error!")
}
}