use crate::utils::parse_url_with_raw_path;
use crate::{event_handlers::Handles, statistics::StatError::UrlFormat, Command::AddError};
use anyhow::{anyhow, bail, Result};
use reqwest::Url;
use std::collections::HashSet;
use std::{fmt, sync::Arc};
pub trait UrlExt {
fn is_in_scope(&self, scope: &[Url]) -> bool;
fn is_subdomain_of(&self, parent_url: &Url) -> bool;
}
impl UrlExt for Url {
fn is_in_scope(&self, scope: &[Url]) -> bool {
log::trace!("enter: is_in_scope({}, scope: {:?})", self.as_str(), scope);
if scope.is_empty() {
log::error!("is_in_scope check failed (scope is empty, this should not happen)");
log::trace!("exit: is_in_scope -> false");
return false;
}
for url in scope {
if self.host() == url.host() {
log::trace!("exit: is_in_scope -> true (same domain/host)");
return true;
}
if self.is_subdomain_of(url) {
log::trace!("exit: is_in_scope -> true (subdomain)");
return true;
}
}
log::trace!("exit: is_in_scope -> false");
false
}
fn is_subdomain_of(&self, parent_url: &Url) -> bool {
if let (Some(url_domain), Some(parent_domain)) = (self.domain(), parent_url.domain()) {
let candidate = url_domain.to_lowercase();
let candidate = candidate.trim_end_matches('.');
let parent = parent_domain.to_lowercase();
let parent = parent.trim_end_matches('.');
if candidate == parent {
return false;
}
let candidate_parts: Vec<&str> = candidate.split('.').collect();
let parent_parts: Vec<&str> = parent.split('.').collect();
if candidate_parts.len() <= parent_parts.len() {
return false;
}
candidate_parts
.iter()
.rev()
.zip(parent_parts.iter().rev())
.all(|(c, p)| c == p)
} else {
false
}
}
}
#[derive(Debug)]
pub struct FeroxUrl {
pub target: String,
handles: Arc<Handles>,
}
impl FeroxUrl {
pub fn from_string(target: &str, handles: Arc<Handles>) -> Self {
Self {
handles,
target: String::from(target),
}
}
pub fn from_url(target: &Url, handles: Arc<Handles>) -> Self {
Self {
handles,
target: target.as_str().to_string(),
}
}
pub fn formatted_urls(
&self,
word: &str,
collected_extensions: HashSet<String>,
) -> Result<Vec<Url>> {
log::trace!("enter: formatted_urls({word})");
let mut urls = vec![];
let slash = if self.handles.config.add_slash {
Some("/")
} else {
None
};
match self.format(word, slash) {
Ok(url) => urls.push(url),
Err(_) => self.handles.stats.send(AddError(UrlFormat))?,
}
for ext in self
.handles
.config
.extensions
.iter()
.chain(collected_extensions.iter())
{
match self.format(word, Some(ext)) {
Ok(url) => urls.push(url),
Err(_) => self.handles.stats.send(AddError(UrlFormat))?,
}
}
log::trace!("exit: formatted_urls -> {urls:?}");
Ok(urls)
}
pub fn format(&self, word: &str, extension: Option<&str>) -> Result<Url> {
log::trace!("enter: format({word}, {extension:?})");
if Url::parse(word).is_ok() {
let message = format!("word ({word}) from wordlist is a URL, skipping...");
log::warn!("{message}");
log::trace!("exit: format -> Err({message})");
bail!(message);
}
let url = if word.is_empty() {
self.target.to_string()
} else if !self.target.ends_with('/') {
format!("{}/", self.target)
} else {
self.target.to_string()
};
let mut word = if let Some(ext) = extension {
if ext == "/" {
format!("{word}/")
} else {
format!("{word}.{ext}")
}
} else {
String::from(word)
};
if word.starts_with("//") {
word = word.trim_start_matches('/').to_string();
};
let base_url = parse_url_with_raw_path(&url)?;
let mut joined = base_url.join(&word)?;
if !self.handles.config.queries.is_empty() {
joined
.query_pairs_mut()
.extend_pairs(self.handles.config.queries.iter());
}
log::trace!("exit: format_url -> {joined}");
Ok(joined)
}
pub fn normalize(&self) -> String {
log::trace!("enter: normalize");
let normalized = if self.target.ends_with('/') {
self.target.to_string()
} else {
format!("{}/", self.target)
};
log::trace!("exit: normalize -> {normalized}");
normalized
}
pub fn depth(&self) -> Result<usize> {
log::trace!("enter: get_depth");
let target = self.normalize();
let parsed = parse_url_with_raw_path(&target)?;
let parts = parsed
.path_segments()
.ok_or_else(|| anyhow!("No path segments found"))?;
let mut depth = 0;
for _ in parts {
depth += 1;
}
log::trace!("exit: get_depth -> {depth}");
Ok(depth)
}
}
impl fmt::Display for FeroxUrl {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", &self.target)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::Configuration;
#[test]
fn formatted_urls_no_extension_returns_base_url_with_word() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
assert_eq!(urls, [Url::parse("http://localhost/turbo").unwrap()])
}
#[test]
fn formatted_urls_one_extension_returns_two_urls() {
let config = Configuration {
extensions: vec![String::from("js")],
..Default::default()
};
let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
assert_eq!(
urls,
[
Url::parse("http://localhost/turbo").unwrap(),
Url::parse("http://localhost/turbo.js").unwrap()
]
)
}
#[test]
fn formatted_urls_multiple_extensions_returns_n_plus_one_urls() {
let ext_vec = vec![
vec![String::from("js")],
vec![String::from("js"), String::from("php")],
vec![String::from("js"), String::from("php"), String::from("pdf")],
vec![
String::from("js"),
String::from("php"),
String::from("pdf"),
String::from("tar.gz"),
],
];
let base = Url::parse("http://localhost/turbo").unwrap();
let js = Url::parse("http://localhost/turbo.js").unwrap();
let php = Url::parse("http://localhost/turbo.php").unwrap();
let pdf = Url::parse("http://localhost/turbo.pdf").unwrap();
let tar = Url::parse("http://localhost/turbo.tar.gz").unwrap();
let expected = [
vec![base.clone(), js.clone()],
vec![base.clone(), js.clone(), php.clone()],
vec![base.clone(), js.clone(), php.clone(), pdf.clone()],
vec![base, js, php, pdf, tar],
];
for (i, ext_set) in ext_vec.into_iter().enumerate() {
let config = Configuration {
extensions: ext_set,
..Default::default()
};
let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let urls = url.formatted_urls("turbo", HashSet::new()).unwrap();
assert_eq!(urls, expected[i]);
}
}
#[test]
fn depth_base_url_returns_1() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let depth = url.depth().unwrap();
assert_eq!(depth, 1);
}
#[test]
fn depth_base_url_with_slash_returns_1() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost/", handles);
let depth = url.depth().unwrap();
assert_eq!(depth, 1);
}
#[test]
fn depth_one_dir_returns_2() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost/src", handles);
let depth = url.depth().unwrap();
assert_eq!(depth, 2);
}
#[test]
fn depth_one_dir_with_slash_returns_2() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost/src/", handles);
let depth = url.depth().unwrap();
assert_eq!(depth, 2);
}
#[test]
fn format_url_normal() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("stuff", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/stuff").unwrap()
);
}
#[test]
fn format_url_no_word() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("", None).unwrap();
assert_eq!(formatted, reqwest::Url::parse("http://localhost").unwrap());
}
#[test]
fn format_url_joins_queries() {
let config = Configuration {
queries: vec![(String::from("stuff"), String::from("things"))],
..Default::default()
};
let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("lazer", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/lazer?stuff=things").unwrap()
);
}
#[test]
fn format_url_without_word_joins_queries() {
let config = Configuration {
queries: vec![(String::from("stuff"), String::from("things"))],
..Default::default()
};
let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/?stuff=things").unwrap()
);
}
#[test]
#[should_panic]
fn format_url_no_url() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("", handles);
url.format("stuff", None).unwrap();
}
#[test]
fn format_url_word_with_preslash() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("/stuff", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/stuff").unwrap()
);
}
#[test]
fn format_url_word_with_postslash() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("stuff/", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/stuff/").unwrap()
);
}
#[test]
fn format_url_word_with_two_prepended_slashes() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("//upload/img", None).unwrap();
assert_eq!(
formatted,
reqwest::Url::parse("http://localhost/upload/img").unwrap()
);
}
#[test]
fn format_url_word_with_two_prepended_slashes_and_extensions() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
for ext in ["rocks", "fun"] {
let to_check = format!("http://localhost/upload/ferox.{ext}");
assert_eq!(
url.format("//upload/ferox", Some(ext)).unwrap(),
reqwest::Url::parse(&to_check[..]).unwrap()
);
}
}
#[test]
fn format_url_word_that_is_a_url() {
let handles = Arc::new(Handles::for_testing(None, None).0);
let url = FeroxUrl::from_string("http://localhost", handles);
let formatted = url.format("http://schmocalhost", None);
assert!(formatted.is_err());
}
#[test]
fn formatted_urls_with_postslash_and_extensions() {
let config = Configuration {
add_slash: true,
extensions: vec!["rocks".to_string(), "fun".to_string()],
..Default::default()
};
let handles = Arc::new(Handles::for_testing(None, Some(Arc::new(config))).0);
let url = FeroxUrl::from_string("http://localhost", handles);
match url.formatted_urls("ferox", HashSet::new()) {
Ok(urls) => {
assert_eq!(urls.len(), 3);
assert_eq!(
urls,
[
Url::parse("http://localhost/ferox/").unwrap(),
Url::parse("http://localhost/ferox.rocks").unwrap(),
Url::parse("http://localhost/ferox.fun").unwrap(),
]
)
}
Err(err) => panic!("{}", err.to_string()),
}
}
#[test]
fn test_is_in_scope() {
let url = Url::parse("http://localhost").unwrap();
let scope = vec![
Url::parse("http://localhost").unwrap(),
Url::parse("http://example.com").unwrap(),
];
assert!(url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_subdomain() {
let url = Url::parse("http://sub.localhost").unwrap();
let scope = vec![
Url::parse("http://localhost").unwrap(),
Url::parse("http://example.com").unwrap(),
];
assert!(url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_not_in_scope() {
let url = Url::parse("http://notinscope.com").unwrap();
let scope = vec![
Url::parse("http://localhost").unwrap(),
Url::parse("http://example.com").unwrap(),
];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_empty_scope() {
let url = Url::parse("http://localhost").unwrap();
let scope: Vec<Url> = vec![];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_domain_only_scope() {
let url = Url::parse("http://example.com").unwrap();
let scope = vec![Url::parse("http://example.com").unwrap()];
assert!(url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_subdomain_domain_only_scope() {
let url = Url::parse("http://sub.example.com").unwrap();
let scope = vec![Url::parse("http://example.com").unwrap()];
assert!(url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_no_domain() {
let url = Url::parse("file:///path/to/file").unwrap();
let scope = vec![Url::parse("http://example.com").unwrap()];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_subdomain_of_true() {
let subdomain_url = Url::parse("http://sub.example.com").unwrap();
let parent_url = Url::parse("http://example.com").unwrap();
assert!(subdomain_url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_subdomain_of_same_domain() {
let url = Url::parse("http://example.com").unwrap();
let parent_url = Url::parse("http://example.com").unwrap();
assert!(!url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_subdomain_of_different_domain() {
let url = Url::parse("http://other.com").unwrap();
let parent_url = Url::parse("http://example.com").unwrap();
assert!(!url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_subdomain_of_multi_level() {
let subdomain_url = Url::parse("http://deep.sub.example.com").unwrap();
let parent_url = Url::parse("http://example.com").unwrap();
assert!(subdomain_url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_subdomain_of_no_domain() {
let url = Url::parse("file:///path/to/file").unwrap();
let parent_url = Url::parse("http://example.com").unwrap();
assert!(!url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_subdomain_of_parent_no_domain() {
let url = Url::parse("http://example.com").unwrap();
let parent_url = Url::parse("file:///path/to/file").unwrap();
assert!(!url.is_subdomain_of(&parent_url));
}
#[test]
fn test_is_not_in_empty_scope() {
let url = Url::parse("http://example.com/path").unwrap();
let scope: Vec<Url> = Vec::new();
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_subdomain_with_empty_scope() {
let url = Url::parse("http://sub.example.com").unwrap();
let scope: Vec<Url> = vec![];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_scope_match() {
let url = Url::parse("http://other.com").unwrap();
let scope = vec![Url::parse("http://other.com").unwrap()];
assert!(url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_not_allowed() {
let url = Url::parse("http://notallowed.com").unwrap();
let scope = vec![Url::parse("http://other.com").unwrap()];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_empty_scope_different_domain() {
let url = Url::parse("http://other.com").unwrap();
let scope: Vec<Url> = vec![];
assert!(!url.is_in_scope(&scope));
}
#[test]
fn test_is_in_scope_subdomain_in_scope() {
let url = Url::parse("http://sub.allowed.com").unwrap();
let scope = vec![Url::parse("http://allowed.com").unwrap()];
assert!(url.is_in_scope(&scope));
}
}