use std::{
collections::{HashMap, HashSet},
fmt::Display,
io,
path::PathBuf,
};
use crate::{
cli::StatusRangeRule,
handler::Secret,
urlparser::{ResponseStatus, URLNode},
};
use addr::parse_domain_name;
use anyhow::Result;
use csv::Writer;
use owo_colors::OwoColorize;
pub static UNKNOWN_HOST: &str = "UNKNOWN_HOST";
pub enum URLType {
Url,
JS,
}
impl AsRef<str> for URLType {
fn as_ref(&self) -> &str {
match self {
URLType::Url => "URL",
URLType::JS => "JS",
}
}
}
impl Display for URLType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
URLType::Url => write!(f, "URL"),
URLType::JS => write!(f, "JS"),
}
}
}
pub struct Formatter {
allowed_status: Option<StatusRangeRule>,
}
impl Formatter {
pub fn new(allowed_status: Option<StatusRangeRule>) -> Self {
Self { allowed_status }
}
pub fn format_status(&self, status: &ResponseStatus) -> String {
match status {
ResponseStatus::Ignore => status.purple().to_string(),
ResponseStatus::Valid(code) => match code {
200 => status.on_green().to_string(),
300..400 => status.on_yellow().to_string(),
400..500 => status.on_magenta().to_string(),
_ => status.on_red().to_string(),
},
ResponseStatus::Failed(_) | ResponseStatus::Unknown => status.red().to_string(),
}
}
pub fn format_normal_result(&self, content: &str) -> String {
if content.is_empty() {
return "".to_string();
}
content.bright_blue().to_string()
}
pub fn format_single_url(&self, url: &URLNode) -> String {
format!(
"{url} [{status}] [Content-Length: {cl}] [Content-Type: {ct}] [Title: {title}]",
url = self.format_normal_result(&url.url),
status = self.format_status(&url.response_status),
cl = self.format_normal_result(
&url.content_length
.map(|c| c.to_string())
.unwrap_or_default()
),
ct = self.format_normal_result(&url.content_type.clone().unwrap_or_default()),
title = self.format_normal_result(&url.title.clone().unwrap_or_default()),
)
.to_string()
}
pub fn url_to_domain(&self, node: &URLNode) -> String {
node.url_obj.host_str().unwrap_or(UNKNOWN_HOST).to_string()
}
pub fn found_domains(&self, found_urls: Vec<&URLNode>) -> HashSet<String> {
found_urls
.into_iter()
.map(|node| self.url_to_domain(node))
.collect::<HashSet<String>>()
}
pub fn format_found_domains(&self, domains: HashSet<String>) -> String {
let len = domains.len();
let urls_str = domains.into_iter().collect::<Vec<String>>().join("\n");
format!(
"{num} Domains:\n{urls}\n",
num = len,
urls = self.format_normal_result(&urls_str)
)
.to_string()
}
pub fn format_url_hierarchy(&self, urls: &HashMap<URLNode, HashSet<URLNode>>) -> String {
urls.iter()
.filter_map(|(base_url, child_urls)| {
let children = child_urls
.iter()
.filter(|u| self.filter(u))
.map(|u| self.format_single_url(u))
.collect::<Vec<String>>();
if children.is_empty() {
None
} else {
Some(format!(
"{num} URLs from {base} [{base_status}] (depth:{base_depth}): \n{urls_str}",
num = children.len(),
base = base_url.url,
base_status = self.format_status(&base_url.response_status),
base_depth = base_url.depth,
urls_str = children.join("\n")
))
}
})
.collect::<Vec<String>>()
.join("\n")
}
pub fn format_url_per_domain(
&self,
domains: &HashSet<String>,
urls: &HashMap<URLNode, HashSet<URLNode>>,
url_type: URLType,
) -> String {
let root_domains = domains
.iter()
.map(|domain| get_root_domain(domain))
.collect::<HashSet<String>>();
let mut domain_urls: HashMap<String, Vec<&URLNode>> = HashMap::new();
for (base_url, child_urls) in urls {
let mut all_urls = Vec::with_capacity(child_urls.len() + 1);
all_urls.push(base_url);
all_urls.extend(child_urls.iter());
for url in all_urls {
if !self.filter(url) {
continue;
}
let domain = url
.url_obj
.host_str()
.map(get_root_domain)
.filter(|domain| root_domains.contains(domain))
.unwrap_or_else(|| "Other".to_string());
domain_urls.entry(domain).or_default().push(url);
}
}
let mut domains = domain_urls.keys().cloned().collect::<Vec<String>>();
domains.sort();
if let Some(other_pos) = domains.iter().position(|domain| domain == "Other") {
let other = domains.remove(other_pos);
domains.push(other);
}
domains
.iter()
.filter_map(|domain| {
let urls = domain_urls.get(domain)?;
if urls.is_empty() {
return None;
}
let urls_str = urls
.iter()
.map(|url| self.format_single_url(url))
.collect::<Vec<String>>()
.join("\n");
Some(format!(
"{num} {url_type} from {domain}:\n{urls_str}",
num = urls.len(),
domain = domain
))
})
.collect::<Vec<String>>()
.join("\n")
}
pub fn format_js(&self, js_urls: &HashMap<URLNode, HashSet<URLNode>>) -> String {
js_urls
.iter()
.filter_map(|(base_url, child_urls)| {
let len = child_urls.len();
let child_urls = child_urls
.iter()
.filter(|u| self.filter(u))
.map(|u| {
format!(
"{url} [{res}]",
url = u.url,
res = self.format_status(&u.response_status)
)
})
.collect::<Vec<String>>()
.join("\n");
if child_urls.is_empty() {
None
} else {
Some(format!(
"{num} JS from {base}:\n{urls}",
num = len,
base = base_url.url,
urls = child_urls,
))
}
})
.collect::<Vec<String>>()
.join("\n")
}
pub fn format_secrets(&self, url_secrets: &HashMap<URLNode, HashSet<Secret>>) -> String {
let res = url_secrets
.iter()
.filter_map(|(url, secrets)| {
if secrets.is_empty() {
return None;
}
Some(format!(
"{num} secrets found in {url} [{res}]:\n{secrets}",
num = secrets.len(),
url = url.url,
res = self.format_status(&url.response_status),
secrets = self.format_normal_result(
secrets
.iter()
.map(|s| format!("{}: {}", s.secret_type, s.data))
.collect::<Vec<String>>()
.join("\n")
.as_ref()
)
))
})
.collect::<Vec<String>>();
if res.is_empty() {
"No secrets found\n".to_string()
} else {
res.join("\n")
}
}
pub fn format_local_secrets(
&self,
path_secrets: &HashMap<&PathBuf, HashSet<Secret>>,
) -> String {
let res = path_secrets
.iter()
.filter_map(|(path, secrets)| {
if secrets.is_empty() {
return None;
}
let mut res = format!(
"{num} secrets found in {path}:\n",
num = secrets.len(),
path = path.to_str()?,
)
.cyan()
.to_string();
res.push_str(
secrets
.iter()
.map(|s| format!("{}: {}", s.secret_type, s.data))
.collect::<Vec<String>>()
.join("\n")
.as_ref(),
);
Some(res)
})
.collect::<Vec<String>>();
if res.is_empty() {
"No secrets found\n".to_string()
} else {
res.join("\n")
}
}
pub fn filter(&self, url: &URLNode) -> bool {
match url.response_status {
ResponseStatus::Valid(c) => {
if c == 404_u16 {
false
} else {
match &self.allowed_status {
None => true, Some(allowed_status) => allowed_status.is_allowed(c),
}
}
}
ResponseStatus::Unknown => true,
ResponseStatus::Ignore => false,
ResponseStatus::Failed(_) => false,
}
}
}
fn get_root_domain(host: &str) -> String {
match parse_domain_name(host) {
Ok(domain) => domain
.root()
.map(str::to_string)
.unwrap_or(host.to_string()),
Err(_) => host.to_string(),
}
}
pub fn output_csv(
outfile: Box<dyn io::Write>,
urls: &HashMap<URLNode, HashSet<URLNode>>,
url_secrets: &HashMap<URLNode, HashSet<Secret>>,
) -> Result<u32> {
let mut writer = Writer::from_writer(outfile);
let mut count = 0;
writer.write_record([
"URL",
"Title",
"Response Code",
"Content Length",
"Content Type",
"Secrets",
])?;
let mut url_set = urls
.iter()
.flat_map(|(url, children)| {
let mut v = vec![url];
v.extend(children);
v
})
.collect::<HashSet<&URLNode>>();
url_set.extend(url_secrets.keys());
for url in url_set {
let secrets = if let Some(secrets) = url_secrets.get(url) {
secrets
.iter()
.map(|s| format!("{}: {}", s.secret_type, s.data))
.collect::<Vec<String>>()
.join("\n")
} else {
"".to_string()
};
writer.write_record([
url.url.to_owned(),
url.title.clone().unwrap_or_default(),
url.response_status.to_string(),
url.content_length.unwrap_or_default().to_string(),
url.content_type.clone().unwrap_or_default(),
secrets,
])?;
count += 1;
}
writer.flush()?;
Ok(count)
}