use std::{
fs,
io::{self, BufRead},
path::PathBuf,
};
use anyhow::{Context, Result};
use clap::{ArgGroup, Parser, ValueEnum};
use url::Url;
use crate::reports::{ReportFormat, Severity};
#[derive(Debug, Parser)]
#[command(
author,
version,
about,
long_about = None,
// Require exactly one of --urls, --stdin, or --har
group(
ArgGroup::new("input")
.required(true)
.args(["urls", "stdin", "har"])
)
)]
pub struct Cli {
#[arg(short = 'u', long, value_name = "FILE", group = "input")]
pub urls: Option<PathBuf>,
#[arg(long, group = "input")]
pub stdin: bool,
#[arg(long, value_name = "FILE", group = "input")]
pub har: Option<PathBuf>,
#[arg(long)]
pub no_filter: bool,
#[arg(long, default_value_t = 3, value_name = "SECS")]
pub filter_timeout: u64,
#[arg(long)]
pub no_discovery: bool,
#[arg(short = 'o', long, value_name = "FILE")]
pub output: Option<PathBuf>,
#[arg(short = 'f', long, default_value = "pretty", value_name = "FORMAT")]
pub format: CliFormat,
#[arg(long)]
pub stream: bool,
#[arg(long, value_name = "FILE")]
pub baseline: Option<PathBuf>,
#[arg(short = 'q', long)]
pub quiet: bool,
#[arg(long)]
pub summary: bool,
#[arg(long)]
pub no_auto_report: bool,
#[arg(short = 'c', long, default_value_t = 20, value_name = "N")]
pub concurrency: usize,
#[arg(short = 'n', long, default_value_t = 50, value_name = "N")]
pub max_endpoints: usize,
#[arg(long, default_value_t = 150, value_name = "MS")]
pub delay_ms: u64,
#[arg(long, default_value_t = 1, value_name = "N")]
pub retries: u32,
#[arg(long, default_value_t = 8, value_name = "SECS")]
pub timeout_secs: u64,
#[arg(long)]
pub waf_evasion: bool,
#[arg(long, value_name = "UA,...", value_delimiter = ',')]
pub user_agents: Vec<String>,
#[arg(long, value_name = "NAME:VALUE", value_delimiter = ',')]
pub headers: Vec<String>,
#[arg(long, value_name = "NAME=VALUE", value_delimiter = ',')]
pub cookies: Vec<String>,
#[arg(long, value_name = "URL")]
pub proxy: Option<String>,
#[arg(long)]
pub danger_accept_invalid_certs: bool,
#[arg(long)]
pub active_checks: bool,
#[arg(long)]
pub dry_run: bool,
#[arg(long)]
pub per_host_clients: bool,
#[arg(long)]
pub adaptive_concurrency: bool,
#[arg(long, value_name = "TOKEN")]
pub auth_bearer: Option<String>,
#[arg(long, value_name = "USER:PASS")]
pub auth_basic: Option<String>,
#[arg(long, value_name = "FILE")]
pub auth_flow: Option<PathBuf>,
#[arg(long, value_name = "FILE")]
pub auth_flow_b: Option<PathBuf>,
#[arg(long, value_name = "NAME", value_delimiter = ',')]
pub unauth_strip_headers: Option<Vec<String>>,
#[arg(long, value_name = "FILE")]
pub session_file: Option<PathBuf>,
#[arg(long)]
pub no_cors: bool,
#[arg(long)]
pub no_csp: bool,
#[arg(long)]
pub no_graphql: bool,
#[arg(long)]
pub no_api_security: bool,
#[arg(long)]
pub no_jwt: bool,
#[arg(long)]
pub no_openapi: bool,
#[arg(long)]
pub no_mass_assignment: bool,
#[arg(long)]
pub no_oauth_oidc: bool,
#[arg(long)]
pub no_rate_limit: bool,
#[arg(long)]
pub no_cve_templates: bool,
#[arg(long)]
pub no_websocket: bool,
#[arg(long, value_name = "LEVEL")]
pub min_severity: Option<CliSeverity>,
#[arg(long, default_value = "medium", value_name = "LEVEL")]
pub fail_on: CliSeverity,
}
#[derive(Debug, Clone, Copy, ValueEnum)]
pub enum CliFormat {
Pretty,
Ndjson,
Sarif,
}
#[derive(Debug, Clone, Copy, ValueEnum)]
pub enum CliSeverity {
Critical,
High,
Medium,
Low,
Info,
}
impl From<CliSeverity> for Severity {
fn from(c: CliSeverity) -> Self {
match c {
CliSeverity::Critical => Severity::Critical,
CliSeverity::High => Severity::High,
CliSeverity::Medium => Severity::Medium,
CliSeverity::Low => Severity::Low,
CliSeverity::Info => Severity::Info,
}
}
}
impl From<CliFormat> for ReportFormat {
fn from(c: CliFormat) -> Self {
match c {
CliFormat::Pretty => ReportFormat::Pretty,
CliFormat::Ndjson => ReportFormat::Ndjson,
CliFormat::Sarif => ReportFormat::Sarif,
}
}
}
#[derive(Debug, serde::Deserialize)]
struct HarFile {
log: HarLog,
}
#[derive(Debug, serde::Deserialize)]
struct HarLog {
entries: Vec<HarEntry>,
}
#[derive(Debug, serde::Deserialize)]
struct HarEntry {
request: HarRequest,
}
#[derive(Debug, serde::Deserialize)]
struct HarRequest {
url: String,
#[serde(default)]
method: String,
}
pub fn load_urls(cli: &Cli) -> Result<Vec<String>> {
let lines: Vec<String> = if let Some(ref path) = cli.urls {
let content = fs::read_to_string(path)
.with_context(|| format!("Cannot read URL file: {}", path.display()))?;
content.lines().map(str::to_owned).collect()
} else if let Some(ref path) = cli.har {
load_urls_from_har(path)?
} else {
let stdin = io::stdin();
stdin
.lock()
.lines()
.collect::<Result<_, _>>()
.context("Failed to read URLs from stdin")?
};
let urls = lines
.into_iter()
.map(|l| l.trim().to_owned())
.filter(|l| !l.is_empty() && !l.starts_with('#'))
.collect();
Ok(urls)
}
fn load_urls_from_har(path: &PathBuf) -> Result<Vec<String>> {
let content = fs::read_to_string(path)
.with_context(|| format!("Cannot read HAR file: {}", path.display()))?;
let har: HarFile = serde_json::from_str(&content)
.with_context(|| format!("Cannot parse HAR file: {}", path.display()))?;
Ok(har
.log
.entries
.into_iter()
.filter_map(|entry| {
let url = entry.request.url.trim().to_string();
if !(url.starts_with("http://") || url.starts_with("https://")) {
return None;
}
if !is_likely_api_url(&url, &entry.request.method) {
return None;
}
Some(url)
})
.collect())
}
fn is_likely_api_url(raw_url: &str, method: &str) -> bool {
let parsed = match Url::parse(raw_url) {
Ok(u) => u,
Err(_) => return false,
};
let host = parsed.host_str().unwrap_or("").to_ascii_lowercase();
let path = parsed.path().to_ascii_lowercase();
let query = parsed.query().unwrap_or("").to_ascii_lowercase();
let method = method.to_ascii_uppercase();
if is_likely_static_host(&host) || is_static_asset_path(&path) {
return false;
}
if !matches!(method.as_str(), "" | "GET" | "HEAD" | "OPTIONS") {
return true;
}
if host.starts_with("api.") || host.contains(".api.") {
return true;
}
let needle_haystack = format!("{path}?{query}");
const KEYWORDS: &[&str] = &[
"/api", "graphql", "openapi", "swagger", "oauth", "oidc", "auth", "token", "session",
"login", "logout", "signin", "identity", "/v1", "/v2", "/v3", "/rpc",
];
KEYWORDS.iter().any(|k| needle_haystack.contains(k))
}
fn is_likely_static_host(host: &str) -> bool {
if host.ends_with("awsstatic.com")
|| host.ends_with("cloudfront.net")
|| host.contains("fonts.")
|| host.contains("analytics")
{
return true;
}
host.starts_with("cdn.")
|| host.contains(".cdn.")
|| host.starts_with("static.")
|| host.contains(".static.")
|| host.starts_with("assets.")
|| host.contains(".assets.")
}
fn is_static_asset_path(path: &str) -> bool {
const EXTENSIONS: &[&str] = &[
".js", ".css", ".map", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".woff", ".woff2",
".ttf", ".eot", ".webp", ".avif", ".mp4", ".webm", ".mp3", ".wav", ".pdf", ".zip",
];
EXTENSIONS.iter().any(|ext| path.ends_with(ext))
}
pub fn default_user_agents() -> Vec<String> {
crate::waf::WafEvasion::user_agent_pool()
}