1use std::{
6 fs,
7 io::{self, BufRead},
8 path::PathBuf,
9};
10
11use anyhow::{Context, Result};
12use clap::{ArgGroup, Parser, ValueEnum};
13use url::Url;
14
15use crate::reports::{ReportFormat, Severity};
16
17#[derive(Debug, Parser)]
24#[command(
25 author,
26 version,
27 about,
28 long_about = None,
29 group(
31 ArgGroup::new("input")
32 .required(true)
33 .args(["urls", "stdin", "har"])
34 )
35)]
36pub struct Cli {
37 #[arg(short = 'u', long, value_name = "FILE", group = "input")]
40 pub urls: Option<PathBuf>,
41
42 #[arg(long, group = "input")]
44 pub stdin: bool,
45
46 #[arg(long, value_name = "FILE", group = "input")]
48 pub har: Option<PathBuf>,
49
50 #[arg(long)]
52 pub no_filter: bool,
53
54 #[arg(long, default_value_t = 3, value_name = "SECS")]
56 pub filter_timeout: u64,
57
58 #[arg(long)]
60 pub no_discovery: bool,
61
62 #[arg(short = 'o', long, value_name = "FILE")]
65 pub output: Option<PathBuf>,
66
67 #[arg(short = 'f', long, default_value = "pretty", value_name = "FORMAT")]
69 pub format: CliFormat,
70
71 #[arg(long)]
73 pub stream: bool,
74
75 #[arg(long, value_name = "FILE")]
77 pub baseline: Option<PathBuf>,
78
79 #[arg(short = 'q', long)]
81 pub quiet: bool,
82
83 #[arg(long)]
85 pub summary: bool,
86
87 #[arg(long)]
89 pub no_auto_report: bool,
90
91 #[arg(short = 'c', long, default_value_t = 20, value_name = "N")]
94 pub concurrency: usize,
95
96 #[arg(short = 'n', long, default_value_t = 50, value_name = "N")]
98 pub max_endpoints: usize,
99
100 #[arg(long, default_value_t = 150, value_name = "MS")]
103 pub delay_ms: u64,
104
105 #[arg(long, default_value_t = 1, value_name = "N")]
107 pub retries: u32,
108
109 #[arg(long, default_value_t = 8, value_name = "SECS")]
111 pub timeout_secs: u64,
112
113 #[arg(long)]
116 pub waf_evasion: bool,
117
118 #[arg(long, value_name = "UA,...", value_delimiter = ',')]
121 pub user_agents: Vec<String>,
122
123 #[arg(long, value_name = "NAME:VALUE", value_delimiter = ',')]
126 pub headers: Vec<String>,
127
128 #[arg(long, value_name = "NAME=VALUE", value_delimiter = ',')]
130 pub cookies: Vec<String>,
131
132 #[arg(long, value_name = "URL")]
134 pub proxy: Option<String>,
135
136 #[arg(long)]
138 pub danger_accept_invalid_certs: bool,
139
140 #[arg(long)]
142 pub active_checks: bool,
143
144 #[arg(long)]
146 pub dry_run: bool,
147
148 #[arg(long)]
150 pub per_host_clients: bool,
151
152 #[arg(long)]
154 pub adaptive_concurrency: bool,
155
156 #[arg(long, value_name = "TOKEN")]
158 pub auth_bearer: Option<String>,
159
160 #[arg(long, value_name = "USER:PASS")]
162 pub auth_basic: Option<String>,
163
164 #[arg(long, value_name = "FILE")]
167 pub auth_flow: Option<PathBuf>,
168
169 #[arg(long, value_name = "FILE")]
171 pub auth_flow_b: Option<PathBuf>,
172
173 #[arg(long, value_name = "NAME", value_delimiter = ',')]
175 pub unauth_strip_headers: Option<Vec<String>>,
176
177 #[arg(long, value_name = "FILE")]
179 pub session_file: Option<PathBuf>,
180
181 #[arg(long)]
184 pub no_cors: bool,
185
186 #[arg(long)]
188 pub no_csp: bool,
189
190 #[arg(long)]
192 pub no_graphql: bool,
193
194 #[arg(long)]
196 pub no_api_security: bool,
197
198 #[arg(long)]
200 pub no_jwt: bool,
201
202 #[arg(long)]
204 pub no_openapi: bool,
205
206 #[arg(long)]
208 pub no_mass_assignment: bool,
209
210 #[arg(long)]
212 pub no_oauth_oidc: bool,
213
214 #[arg(long)]
216 pub no_rate_limit: bool,
217
218 #[arg(long)]
220 pub no_cve_templates: bool,
221
222 #[arg(long)]
224 pub no_websocket: bool,
225
226 #[arg(long, value_name = "LEVEL")]
229 pub min_severity: Option<CliSeverity>,
230
231 #[arg(long, default_value = "medium", value_name = "LEVEL")]
233 pub fail_on: CliSeverity,
234}
235
236#[derive(Debug, Clone, Copy, ValueEnum)]
239pub enum CliFormat {
240 Pretty,
241 Ndjson,
242 Sarif,
243}
244
245#[derive(Debug, Clone, Copy, ValueEnum)]
246pub enum CliSeverity {
247 Critical,
248 High,
249 Medium,
250 Low,
251 Info,
252}
253
254impl From<CliSeverity> for Severity {
255 fn from(c: CliSeverity) -> Self {
256 match c {
257 CliSeverity::Critical => Severity::Critical,
258 CliSeverity::High => Severity::High,
259 CliSeverity::Medium => Severity::Medium,
260 CliSeverity::Low => Severity::Low,
261 CliSeverity::Info => Severity::Info,
262 }
263 }
264}
265
266impl From<CliFormat> for ReportFormat {
267 fn from(c: CliFormat) -> Self {
268 match c {
269 CliFormat::Pretty => ReportFormat::Pretty,
270 CliFormat::Ndjson => ReportFormat::Ndjson,
271 CliFormat::Sarif => ReportFormat::Sarif,
272 }
273 }
274}
275
276#[derive(Debug, serde::Deserialize)]
279struct HarFile {
280 log: HarLog,
281}
282
283#[derive(Debug, serde::Deserialize)]
284struct HarLog {
285 entries: Vec<HarEntry>,
286}
287
288#[derive(Debug, serde::Deserialize)]
289struct HarEntry {
290 request: HarRequest,
291}
292
293#[derive(Debug, serde::Deserialize)]
294struct HarRequest {
295 url: String,
296 #[serde(default)]
297 method: String,
298}
299
300pub fn load_urls(cli: &Cli) -> Result<Vec<String>> {
303 let lines: Vec<String> = if let Some(ref path) = cli.urls {
304 let content = fs::read_to_string(path)
305 .with_context(|| format!("Cannot read URL file: {}", path.display()))?;
306 content.lines().map(str::to_owned).collect()
307 } else if let Some(ref path) = cli.har {
308 load_urls_from_har(path)?
309 } else {
310 let stdin = io::stdin();
312 stdin
313 .lock()
314 .lines()
315 .collect::<Result<_, _>>()
316 .context("Failed to read URLs from stdin")?
317 };
318
319 let urls = lines
320 .into_iter()
321 .map(|l| l.trim().to_owned())
322 .filter(|l| !l.is_empty() && !l.starts_with('#'))
323 .collect();
324
325 Ok(urls)
326}
327
328fn load_urls_from_har(path: &PathBuf) -> Result<Vec<String>> {
329 let content = fs::read_to_string(path)
330 .with_context(|| format!("Cannot read HAR file: {}", path.display()))?;
331 let har: HarFile = serde_json::from_str(&content)
332 .with_context(|| format!("Cannot parse HAR file: {}", path.display()))?;
333
334 Ok(har
335 .log
336 .entries
337 .into_iter()
338 .filter_map(|entry| {
339 let url = entry.request.url.trim().to_string();
340 if !(url.starts_with("http://") || url.starts_with("https://")) {
341 return None;
342 }
343 if !is_likely_api_url(&url, &entry.request.method) {
344 return None;
345 }
346 Some(url)
347 })
348 .collect())
349}
350
351fn is_likely_api_url(raw_url: &str, method: &str) -> bool {
352 let parsed = match Url::parse(raw_url) {
353 Ok(u) => u,
354 Err(_) => return false,
355 };
356
357 let host = parsed.host_str().unwrap_or("").to_ascii_lowercase();
358 let path = parsed.path().to_ascii_lowercase();
359 let query = parsed.query().unwrap_or("").to_ascii_lowercase();
360 let method = method.to_ascii_uppercase();
361
362 if is_likely_static_host(&host) || is_static_asset_path(&path) {
363 return false;
364 }
365
366 if !matches!(method.as_str(), "" | "GET" | "HEAD" | "OPTIONS") {
368 return true;
369 }
370
371 if host.starts_with("api.") || host.contains(".api.") {
372 return true;
373 }
374
375 let needle_haystack = format!("{path}?{query}");
376 const KEYWORDS: &[&str] = &[
377 "/api", "graphql", "openapi", "swagger", "oauth", "oidc", "auth", "token", "session",
378 "login", "logout", "signin", "identity", "/v1", "/v2", "/v3", "/rpc",
379 ];
380
381 KEYWORDS.iter().any(|k| needle_haystack.contains(k))
382}
383
384fn is_likely_static_host(host: &str) -> bool {
385 if host.ends_with("awsstatic.com")
386 || host.ends_with("cloudfront.net")
387 || host.contains("fonts.")
388 || host.contains("analytics")
389 {
390 return true;
391 }
392
393 host.starts_with("cdn.")
394 || host.contains(".cdn.")
395 || host.starts_with("static.")
396 || host.contains(".static.")
397 || host.starts_with("assets.")
398 || host.contains(".assets.")
399}
400
401fn is_static_asset_path(path: &str) -> bool {
402 const EXTENSIONS: &[&str] = &[
403 ".js", ".css", ".map", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".woff", ".woff2",
404 ".ttf", ".eot", ".webp", ".avif", ".mp4", ".webm", ".mp3", ".wav", ".pdf", ".zip",
405 ];
406 EXTENSIONS.iter().any(|ext| path.ends_with(ext))
407}
408
409pub fn default_user_agents() -> Vec<String> {
412 crate::waf::WafEvasion::user_agent_pool()
413}