api_scanner/scanner/
api_security.rs

1// src/scanner/api_security.rs
2//
3// Checks: secrets in responses, error disclosure, HTTP methods,
4// debug/admin endpoints, directory listing, security.txt, response headers.
5//
6// False-positive mitigation:
7//   • SPA catch-all detection: fetches a known-404 "canary" path before
8//     probing debug endpoints.  If the canary returns 200 + HTML, the site
9//     uses client-side routing and all 200-HTML responses are ignored.
10//   • Content-Type guard: real config / debug endpoints respond with JSON,
11//     plain text, YAML, or XML — not text/html.
12//   • Body-content validation: specific endpoints must contain expected
13//     patterns (e.g. actuator returns JSON, .env contains KEY=VAL).
14
15use async_trait::async_trait;
16use dashmap::DashSet;
17use once_cell::sync::Lazy;
18use rand::seq::SliceRandom;
19use regex::Regex;
20use std::{collections::HashMap, sync::Arc};
21use tracing::debug;
22use url::Url;
23
24use crate::{
25    config::Config,
26    error::CapturedError,
27    http_client::HttpClient,
28    reports::{Finding, Severity},
29};
30
31use super::{
32    common::http_utils::is_html_content_type as common_is_html_content_type,
33    common::string_utils::{redact_secret, slugify, snippet as shared_snippet},
34    Scanner,
35};
36
37pub struct ApiSecurityScanner {
38    client_b: Option<Arc<HttpClient>>,
39    checked_hosts: Arc<DashSet<String>>,
40}
41
42impl ApiSecurityScanner {
43    pub fn new(_config: &Config, client_b: Option<Arc<HttpClient>>) -> Self {
44        Self {
45            client_b,
46            checked_hosts: Arc::new(DashSet::new()),
47        }
48    }
49}
50
51// ── Secret / credential patterns ──────────────────────────────────────────────
52
53static RE_AWS_ACCESS: Lazy<Regex> = Lazy::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").unwrap());
54static RE_AWS_SECRET: Lazy<Regex> =
55    Lazy::new(|| Regex::new(r#"(?i)aws.{0,20}secret.{0,20}['"][0-9a-zA-Z/+]{40}['"]"#).unwrap());
56static RE_API_KEY: Lazy<Regex> = Lazy::new(|| {
57    // Match api_key/apikey patterns with or without quotes
58    // Minimum 16 chars to balance false positives vs recall
59    Regex::new(r#"(?i)(api[_\-]?key|apikey)\s*[:=]\s*['"]?([A-Za-z0-9\-_]{16,64})['"]?"#)
60        .expect("Invalid API_KEY regex")
61});
62
63static RE_BEARER: Lazy<Regex> =
64    Lazy::new(|| Regex::new(r"(?i)bearer\s+[A-Za-z0-9\-_\.=]{20,}").expect("Invalid BEARER regex"));
65static RE_GENERIC_SEC: Lazy<Regex> = Lazy::new(|| {
66    // Keep minimum at 12 chars with quotes to catch real secrets
67    // Require at least one alphanumeric to avoid matching empty/whitespace values
68    Regex::new(r#"(?i)(secret|passwd|password)\s*[:=]\s*['"]([^'"]{12,})['"]"#)
69        .expect("Invalid GENERIC_SEC regex")
70});
71static RE_PRIVATE_KEY: Lazy<Regex> =
72    Lazy::new(|| Regex::new(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----").unwrap());
73static RE_GITHUB: Lazy<Regex> = Lazy::new(|| Regex::new(r"ghp_[0-9a-zA-Z]{36}").unwrap());
74static RE_SLACK: Lazy<Regex> = Lazy::new(|| Regex::new(r"xox[baprs]-[0-9a-zA-Z\-]{10,}").unwrap());
75static RE_STRIPE: Lazy<Regex> = Lazy::new(|| Regex::new(r"sk_live_[0-9a-zA-Z]{24,}").unwrap());
76static RE_SENDGRID: Lazy<Regex> = Lazy::new(|| Regex::new(r"SG\.[A-Za-z0-9\-_\.]{20,}").unwrap());
77static RE_GOOGLE: Lazy<Regex> = Lazy::new(|| Regex::new(r"AIza[0-9A-Za-z\-_]{35}").unwrap());
78static RE_DB_URL: Lazy<Regex> = Lazy::new(|| {
79    Regex::new(r"(?i)(mysql|postgres|mongodb|redis|amqp)://[^@\s]+:[^@\s]+@[^\s]+").unwrap()
80});
81
82struct SecretCheck {
83    name: &'static str,
84    re: &'static Lazy<Regex>,
85}
86
87static SECRET_CHECKS: &[SecretCheck] = &[
88    SecretCheck {
89        name: "AWS Access Key",
90        re: &RE_AWS_ACCESS,
91    },
92    SecretCheck {
93        name: "AWS Secret Key",
94        re: &RE_AWS_SECRET,
95    },
96    SecretCheck {
97        name: "Generic API Key",
98        re: &RE_API_KEY,
99    },
100    SecretCheck {
101        name: "Bearer Token",
102        re: &RE_BEARER,
103    },
104    SecretCheck {
105        name: "Generic Secret",
106        re: &RE_GENERIC_SEC,
107    },
108    SecretCheck {
109        name: "Private Key Header",
110        re: &RE_PRIVATE_KEY,
111    },
112    SecretCheck {
113        name: "GitHub Token",
114        re: &RE_GITHUB,
115    },
116    SecretCheck {
117        name: "Slack Token",
118        re: &RE_SLACK,
119    },
120    SecretCheck {
121        name: "Stripe Secret Key",
122        re: &RE_STRIPE,
123    },
124    SecretCheck {
125        name: "Sendgrid API Key",
126        re: &RE_SENDGRID,
127    },
128    SecretCheck {
129        name: "Google API Key",
130        re: &RE_GOOGLE,
131    },
132    SecretCheck {
133        name: "Database URL",
134        re: &RE_DB_URL,
135    },
136];
137
138// ── Error-disclosure patterns ─────────────────────────────────────────────────
139
140static RE_ERR_JAVA: Lazy<Regex> =
141    Lazy::new(|| Regex::new(r"at [A-Za-z0-9\.$_]+\(.*\.java:\d+\)").unwrap());
142static RE_ERR_PYTHON: Lazy<Regex> =
143    Lazy::new(|| Regex::new(r"Traceback \(most recent call last\)").unwrap());
144static RE_ERR_RUBY: Lazy<Regex> = Lazy::new(|| Regex::new(r"\.rb:\d+:in `").unwrap());
145static RE_ERR_SQL: Lazy<Regex> = Lazy::new(|| {
146    Regex::new(r"(?i)(SQL syntax.*MySQL|mysql_fetch_|ORA-\d{4,5}|pg_query\(\)|SQLite3::Exception|Unclosed quotation mark)").unwrap()
147});
148static RE_ERR_PHP: Lazy<Regex> = Lazy::new(|| {
149    Regex::new(r"(?i)(Parse error|Fatal error|Warning:|Notice:)\s+.+in\s+/.+\.php on line").unwrap()
150});
151static RE_ERR_ASP: Lazy<Regex> =
152    Lazy::new(|| Regex::new(r"(?i)Server Error in '.*' Application\.").unwrap());
153static RE_ERR_DJANGO: Lazy<Regex> =
154    Lazy::new(|| Regex::new(r"(?i)django\.core\.exceptions|<title>Django.*Error</title>").unwrap());
155static RE_ERR_WERKZEUG: Lazy<Regex> =
156    Lazy::new(|| Regex::new(r"(?i)Werkzeug Debugger|The Werkzeug interactive debugger").unwrap());
157static RE_ERR_LARAVEL: Lazy<Regex> =
158    Lazy::new(|| Regex::new(r"(?i)laravel\.log|Whoops[,!].*Laravel").unwrap());
159static RE_ERR_PATH: Lazy<Regex> = Lazy::new(|| {
160    Regex::new(r"(?i)(/home/[a-z_][a-z0-9_]*/|/var/www/|/usr/local/|C:\\Users\\|C:\\inetpub\\)")
161        .unwrap()
162});
163
164struct ErrorCheck {
165    name: &'static str,
166    re: &'static Lazy<Regex>,
167}
168
169static ERROR_CHECKS: &[ErrorCheck] = &[
170    ErrorCheck {
171        name: "Stack trace (Java)",
172        re: &RE_ERR_JAVA,
173    },
174    ErrorCheck {
175        name: "Stack trace (Python)",
176        re: &RE_ERR_PYTHON,
177    },
178    ErrorCheck {
179        name: "Stack trace (Ruby)",
180        re: &RE_ERR_RUBY,
181    },
182    ErrorCheck {
183        name: "SQL error",
184        re: &RE_ERR_SQL,
185    },
186    ErrorCheck {
187        name: "PHP error",
188        re: &RE_ERR_PHP,
189    },
190    ErrorCheck {
191        name: "ASP.NET error page",
192        re: &RE_ERR_ASP,
193    },
194    ErrorCheck {
195        name: "Django debug page",
196        re: &RE_ERR_DJANGO,
197    },
198    ErrorCheck {
199        name: "Werkzeug debugger",
200        re: &RE_ERR_WERKZEUG,
201    },
202    ErrorCheck {
203        name: "Laravel debug",
204        re: &RE_ERR_LARAVEL,
205    },
206    ErrorCheck {
207        name: "Internal path disclosure",
208        re: &RE_ERR_PATH,
209    },
210];
211
212// ── Dangerous HTTP methods ────────────────────────────────────────────────────
213
214static DANGEROUS_METHODS: &[&str] = &["PUT", "DELETE", "PATCH", "TRACE", "CONNECT"];
215
216// ── Directory-listing markers ─────────────────────────────────────────────────
217
218static DIR_LISTING_MARKERS: &[&str] = &[
219    "Index of /",
220    "Directory listing for",
221    "Parent Directory</a>",
222    "[To Parent Directory]",
223];
224
225// ── Common debug / admin endpoints ────────────────────────────────────────────
226
227struct DebugEndpoint {
228    path: &'static str,
229    /// Expected content-types; if empty, any non-HTML is accepted.
230    expected_ct: &'static [&'static str],
231    /// Body must match at least one of these patterns to be considered genuine.
232    body_validators: &'static [fn(&str) -> bool],
233}
234
235/// Returns `true` when the body looks like a dotenv file (`KEY=VALUE` lines).
236fn is_dotenv(body: &str) -> bool {
237    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^[A-Z_][A-Z0-9_]*=.+").unwrap());
238    RE.is_match(body)
239}
240
241/// Returns `true` when the body parses as JSON and is not wrapped in HTML.
242/// Also checks that it's not just an error response.
243fn is_json_body(body: &str) -> bool {
244    let trimmed = body.trim();
245    if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
246        return false;
247    }
248
249    match serde_json::from_str::<serde_json::Value>(trimmed) {
250        Ok(v) => {
251            // Check if this is an array with error objects
252            if let Some(arr) = v.as_array() {
253                if let Some(first) = arr.first() {
254                    if let Some(obj) = first.as_object() {
255                        // Pattern: [{"Status":"404","Message":"..."}]
256                        if let Some(status) = obj.get("Status").and_then(|s| s.as_str()) {
257                            if status == "404"
258                                || status == "403"
259                                || status.parse::<u16>().map(|c| c >= 400).unwrap_or(false)
260                            {
261                                return false;
262                            }
263                        }
264                    }
265                }
266            }
267
268            // Check if this is just an error response
269            if let Some(obj) = v.as_object() {
270                // Common error response patterns
271                let has_error = obj.contains_key("error")
272                    || obj.contains_key("errors")
273                    || obj.contains_key("message")
274                        && obj
275                            .get("message")
276                            .and_then(|m| m.as_str())
277                            .map(|s| s.to_lowercase().contains("error"))
278                            .unwrap_or(false);
279
280                let has_status = obj
281                    .get("status")
282                    .and_then(|s| s.as_u64())
283                    .map(|code| code >= 400)
284                    .unwrap_or(false)
285                    || obj
286                        .get("statusCode")
287                        .and_then(|s| s.as_u64())
288                        .map(|code| code >= 400)
289                        .unwrap_or(false);
290                // If it's just an error response with no other meaningful data, reject it
291                if has_error && (has_status || obj.len() <= 3) {
292                    return false;
293                }
294            }
295            true
296        }
297        Err(_) => false,
298    }
299}
300
301/// Returns `true` when the body looks like YAML config (has key: value lines).
302fn is_yaml_body(body: &str) -> bool {
303    static RE: Lazy<Regex> =
304        Lazy::new(|| Regex::new(r"(?m)^[a-zA-Z][a-zA-Z0-9_.-]*:\s*.+").unwrap());
305    let matches = RE.find_iter(body).count();
306    matches >= 2
307}
308
309/// Returns `true` when the body contains Java properties (key=value or key: value).
310fn is_properties_body(body: &str) -> bool {
311    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^[a-z][a-z0-9._-]+=.+").unwrap());
312    RE.find_iter(body).count() >= 3
313}
314
315/// Returns `true` for Symfony profiler output.
316fn is_profiler_page(body: &str) -> bool {
317    // Symfony profiler has distinctive markers
318    body.contains("sf-toolbar")
319        || body.contains("symfony-profiler")
320        || body.contains("Symfony Profiler")
321        || body.contains("data-symfony-profiler")
322        || body.contains("class=\"sf-")
323        || body.contains("id=\"sfwdt")
324}
325
326/// Returns `true` for phpinfo() output.
327fn is_phpinfo(body: &str) -> bool {
328    body.contains("phpinfo()") || body.contains("PHP Version") && body.contains("Configure Command")
329}
330
331/// Returns `true` when the body contains server-status style output.
332fn is_server_status(body: &str) -> bool {
333    body.contains("Apache Server Status")
334        || body.contains("Server Version:")
335        || body.contains("Current Time:")
336}
337
338/// Returns `true` when body looks like an actuator endpoint (JSON with expected keys).
339fn is_actuator(body: &str) -> bool {
340    if let Ok(v) = serde_json::from_str::<serde_json::Value>(body.trim()) {
341        // /actuator returns {"_links": ...}  or individual endpoints return objects
342        v.get("_links").is_some()
343            || v.get("status").is_some()
344            || v.get("beans").is_some()
345            || v.get("propertySources").is_some()
346            || v.get("activeProfiles").is_some()
347            || v.get("contexts").is_some()
348            || v.get("traces").is_some()
349            || v.get("names").is_some()
350    } else {
351        false
352    }
353}
354
355/// Returns `true` for prometheus-style metrics output.
356fn is_metrics(body: &str) -> bool {
357    static RE: Lazy<Regex> =
358        Lazy::new(|| Regex::new(r"(?m)^(# (HELP|TYPE) |[a-z_]+\{|[a-z_]+ [0-9])").unwrap());
359    RE.find_iter(body).count() >= 3
360}
361
362/// Returns `true` for debug/pprof style output.
363fn is_debug_output(body: &str) -> bool {
364    body.contains("goroutine")
365        || body.contains("heap profile")
366        || body.contains("contention")
367        || is_json_body(body)
368}
369
370/// Returns `true` for XML config (web.config etc.).
371fn is_xml_config(body: &str) -> bool {
372    let trimmed = body.trim();
373    trimmed.starts_with("<?xml") || trimmed.starts_with("<configuration")
374}
375
376/// Returns `true` when JSON body looks like actual config data (not just any JSON).
377/// Rejects Android assetlinks, OAuth metadata, and other non-config JSON.
378fn is_config_json(body: &str) -> bool {
379    let trimmed = body.trim();
380    if !(trimmed.starts_with('{') || trimmed.starts_with('[')) {
381        return false;
382    }
383    match serde_json::from_str::<serde_json::Value>(trimmed) {
384        Ok(v) => {
385            // Reject Android assetlinks arrays
386            if let Some(arr) = v.as_array() {
387                if let Some(first) = arr.first().and_then(|f| f.as_object()) {
388                    if first.contains_key("relation") || first.contains_key("target") {
389                        return false;
390                    }
391                    if let Some(status) = first.get("Status").and_then(|s| s.as_str()) {
392                        if status.parse::<u16>().map(|c| c >= 400).unwrap_or(false) {
393                            return false;
394                        }
395                    }
396                }
397            }
398            if let Some(obj) = v.as_object() {
399                // Must contain config-like keys
400                let config_keys = [
401                    "database",
402                    "host",
403                    "port",
404                    "password",
405                    "secret",
406                    "key",
407                    "token",
408                    "url",
409                    "endpoint",
410                    "debug",
411                    "environment",
412                    "version",
413                    "config",
414                    "setting",
415                ];
416                let has_config = obj.keys().any(|k| {
417                    let kl = k.to_ascii_lowercase();
418                    config_keys.iter().any(|ck| kl.contains(ck))
419                });
420                // Reject pure error responses
421                let has_error = obj.contains_key("errors")
422                    && obj.get("data").map(|d| d.is_null()).unwrap_or(false);
423                if has_error {
424                    return false;
425                }
426                return has_config;
427            }
428            false
429        }
430        Err(_) => false,
431    }
432}
433
434/// Returns `true` when the body does NOT look like an HTML document.
435fn any_non_html(body: &str) -> bool {
436    let trimmed = body.trim().to_ascii_lowercase();
437    !trimmed.starts_with("<!doctype")
438        && !trimmed.starts_with("<html")
439        && !trimmed.starts_with("<?xml")
440        && !trimmed.contains("<head>")
441        && !trimmed.contains("<body")
442}
443
444static DEBUG_ENDPOINTS: &[DebugEndpoint] = &[
445    DebugEndpoint {
446        path: "/debug",
447        expected_ct: &[],
448        body_validators: &[is_debug_output],
449    },
450    DebugEndpoint {
451        path: "/debug/vars",
452        expected_ct: &["application/json"],
453        body_validators: &[is_json_body],
454    },
455    DebugEndpoint {
456        path: "/debug/pprof",
457        expected_ct: &[],
458        body_validators: &[is_debug_output],
459    },
460    DebugEndpoint {
461        path: "/.env",
462        expected_ct: &["text/plain", "application/octet-stream"],
463        body_validators: &[is_dotenv],
464    },
465    DebugEndpoint {
466        path: "/.env.local",
467        expected_ct: &["text/plain", "application/octet-stream"],
468        body_validators: &[is_dotenv],
469    },
470    DebugEndpoint {
471        path: "/.env.production",
472        expected_ct: &["text/plain", "application/octet-stream"],
473        body_validators: &[is_dotenv],
474    },
475    DebugEndpoint {
476        path: "/config.json",
477        expected_ct: &["application/json"],
478        body_validators: &[is_config_json],
479    },
480    DebugEndpoint {
481        path: "/config.yaml",
482        expected_ct: &[
483            "text/yaml",
484            "application/yaml",
485            "text/plain",
486            "application/x-yaml",
487        ],
488        body_validators: &[is_yaml_body],
489    },
490    DebugEndpoint {
491        path: "/config.yml",
492        expected_ct: &[
493            "text/yaml",
494            "application/yaml",
495            "text/plain",
496            "application/x-yaml",
497        ],
498        body_validators: &[is_yaml_body],
499    },
500    DebugEndpoint {
501        path: "/settings.json",
502        expected_ct: &["application/json"],
503        body_validators: &[is_json_body],
504    },
505    DebugEndpoint {
506        path: "/application.properties",
507        expected_ct: &["text/plain"],
508        body_validators: &[is_properties_body],
509    },
510    DebugEndpoint {
511        path: "/application.yml",
512        expected_ct: &[
513            "text/yaml",
514            "application/yaml",
515            "text/plain",
516            "application/x-yaml",
517        ],
518        body_validators: &[is_yaml_body],
519    },
520    DebugEndpoint {
521        path: "/web.config",
522        expected_ct: &["text/xml", "application/xml"],
523        body_validators: &[is_xml_config],
524    },
525    DebugEndpoint {
526        path: "/phpinfo.php",
527        expected_ct: &[],
528        body_validators: &[is_phpinfo],
529    },
530    DebugEndpoint {
531        path: "/info.php",
532        expected_ct: &[],
533        body_validators: &[is_phpinfo],
534    },
535    DebugEndpoint {
536        path: "/server-status",
537        expected_ct: &[],
538        body_validators: &[is_server_status],
539    },
540    DebugEndpoint {
541        path: "/server-info",
542        expected_ct: &[],
543        body_validators: &[is_server_status],
544    },
545    DebugEndpoint {
546        path: "/_profiler",
547        expected_ct: &["text/html"],
548        body_validators: &[is_profiler_page],
549    },
550    DebugEndpoint {
551        path: "/__clockwork",
552        expected_ct: &["application/json"],
553        body_validators: &[is_json_body],
554    },
555    DebugEndpoint {
556        path: "/actuator",
557        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
558        body_validators: &[is_actuator],
559    },
560    DebugEndpoint {
561        path: "/actuator/env",
562        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
563        body_validators: &[is_actuator],
564    },
565    DebugEndpoint {
566        path: "/actuator/health",
567        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
568        body_validators: &[is_actuator, is_json_body],
569    },
570    DebugEndpoint {
571        path: "/actuator/mappings",
572        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
573        body_validators: &[is_actuator],
574    },
575    DebugEndpoint {
576        path: "/actuator/beans",
577        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
578        body_validators: &[is_actuator],
579    },
580    DebugEndpoint {
581        path: "/actuator/httptrace",
582        expected_ct: &["application/json", "application/vnd.spring-boot.actuator"],
583        body_validators: &[is_actuator],
584    },
585    DebugEndpoint {
586        path: "/metrics",
587        expected_ct: &[
588            "text/plain",
589            "application/json",
590            "application/openmetrics-text",
591        ],
592        body_validators: &[is_metrics, is_json_body],
593    },
594    DebugEndpoint {
595        path: "/health",
596        expected_ct: &["application/json"],
597        body_validators: &[is_json_body],
598    },
599    DebugEndpoint {
600        path: "/healthz",
601        expected_ct: &["application/json", "text/plain"],
602        body_validators: &[is_json_body, any_non_html],
603    },
604    DebugEndpoint {
605        path: "/readyz",
606        expected_ct: &["application/json", "text/plain"],
607        body_validators: &[is_json_body, any_non_html],
608    },
609    DebugEndpoint {
610        path: "/status",
611        expected_ct: &["application/json"],
612        body_validators: &[is_json_body],
613    },
614    DebugEndpoint {
615        path: "/admin",
616        expected_ct: &[],
617        body_validators: &[any_non_html],
618    },
619    DebugEndpoint {
620        path: "/admin/config",
621        expected_ct: &["application/json"],
622        body_validators: &[is_config_json],
623    },
624];
625
626// ── SECURITY.TXT paths ────────────────────────────────────────────────────────
627
628static SECURITY_TXT_PATHS: &[&str] = &["/.well-known/security.txt", "/security.txt"];
629
630// ─────────────────────────────────────────────────────────────────────────────
631
632#[async_trait]
633impl Scanner for ApiSecurityScanner {
634    fn name(&self) -> &'static str {
635        "api_security"
636    }
637
638    async fn scan(
639        &self,
640        url: &str,
641        client: &HttpClient,
642        config: &Config,
643    ) -> (Vec<Finding>, Vec<CapturedError>) {
644        let mut findings = Vec::new();
645        let mut errors = Vec::new();
646        let base = url.trim_end_matches('/');
647        let spa_fingerprint = detect_spa_catchall(base, client, &mut errors).await;
648        let spa_catchall = spa_fingerprint.is_some();
649        let security_txt_host = Url::parse(url)
650            .ok()
651            .and_then(|parsed| parsed.host_str().map(|h| h.to_ascii_lowercase()));
652
653        // Run all checks; failures are captured rather than propagated.
654        check_secrets_in_response(url, client, &mut findings, &mut errors).await;
655        check_error_disclosure(url, client, &mut findings, &mut errors).await;
656        check_http_methods(url, client, &mut findings, &mut errors, spa_catchall).await;
657        check_debug_endpoints(url, client, &mut findings, &mut errors, spa_fingerprint).await;
658        check_directory_listing(url, client, &mut findings, &mut errors).await;
659        if security_txt_host
660            .as_ref()
661            .map(|host| self.checked_hosts.insert(host.clone()))
662            .unwrap_or(true)
663        {
664            check_security_txt(url, client, &mut findings).await;
665        }
666        check_response_headers(url, client, &mut findings, &mut errors).await;
667
668        if config.active_checks {
669            check_idor_bola(
670                url,
671                client,
672                self.client_b.as_ref().map(|c| c.as_ref()),
673                &mut findings,
674                &mut errors,
675            )
676            .await;
677        }
678
679        (findings, errors)
680    }
681}
682
683// ── Helpers: SPA / catch-all detection ─────────────────────────────────────────
684
685/// Returns `true` if the Content-Type looks like HTML.
686fn is_html_content_type(ct: &str) -> bool {
687    common_is_html_content_type(ct)
688}
689
690/// Returns `true` if the Content-Type matches any of the expected types.
691fn content_type_matches(ct: &str, expected: &[&str]) -> bool {
692    if expected.is_empty() {
693        return true; // no constraint
694    }
695    let lower = ct.to_ascii_lowercase();
696    expected.iter().any(|e| lower.contains(e))
697}
698
699/// Quick body fingerprint (first 256 bytes + length) — used to detect
700/// SPA catch-all that serves the same shell for every route.
701fn body_fingerprint(body: &str) -> (usize, u64) {
702    use std::collections::hash_map::DefaultHasher;
703    use std::hash::{Hash, Hasher};
704    let prefix: String = body.chars().take(256).collect();
705    let mut h = DefaultHasher::new();
706    prefix.hash(&mut h);
707    (body.len(), h.finish())
708}
709
710/// Detect SPA catch-all: send a request to a random path that should not exist.
711/// If the server returns 200 with HTML (by Content-Type or body inspection),
712/// it's very likely a SPA with catch-all routing.
713///
714/// We test multiple canary paths to handle SPAs that treat paths differently
715/// based on prefix (e.g., paths starting with _ vs __ vs random).
716async fn detect_spa_catchall(
717    base: &str,
718    client: &HttpClient,
719    errors: &mut Vec<CapturedError>,
720) -> Option<(usize, u64)> {
721    // Test multiple canary patterns to catch different SPA routing behaviors
722    let canaries = [
723        format!("{base}/__canary_404_check_xz9q7"),
724        format!("{base}/_canary_test_404"),
725        format!("{base}/xyzabc123notfound"),
726    ];
727
728    for canary in &canaries {
729        match client.get(canary).await {
730            Ok(resp) if resp.status == 200 => {
731                let ct = resp
732                    .headers
733                    .get("content-type")
734                    .map(|s| s.as_str())
735                    .unwrap_or("");
736                // Detect SPA: either HTML Content-Type OR body actually contains HTML
737                let body_is_html = !any_non_html(&resp.body);
738                if is_html_content_type(ct) || body_is_html {
739                    debug!(
740                        url = base,
741                        canary = %canary,
742                        "SPA catch-all detected (canary returned 200+HTML)"
743                    );
744                    return Some(body_fingerprint(&resp.body));
745                }
746            }
747            Ok(_) => continue,
748            Err(mut e) => {
749                e.message = format!("spa_canary_probe: {}", e.message);
750                errors.push(e);
751                continue;
752            }
753        }
754    }
755    None
756}
757
758// ── 1. Secrets in response body ───────────────────────────────────────────────
759
760async fn check_secrets_in_response(
761    url: &str,
762    client: &HttpClient,
763    findings: &mut Vec<Finding>,
764    errors: &mut Vec<CapturedError>,
765) {
766    let resp = match client.get(url).await {
767        Ok(r) => r,
768        Err(e) => {
769            errors.push(e);
770            return;
771        }
772    };
773
774    // Guard 1: Skip non-200 responses
775    if resp.status != 200 {
776        return;
777    }
778
779    // Guard 2: Skip error responses (403, 404 messages in body)
780    let body_lower = resp.body.to_ascii_lowercase();
781    if body_lower.contains("403 forbidden")
782        || body_lower.contains("404 not found")
783        || body_lower.contains("the requested resource is not found")
784        || (body_lower.contains("error") && body_lower.contains("status") && resp.body.len() < 500)
785    {
786        return;
787    }
788
789    let ct = resp
790        .headers
791        .get("content-type")
792        .map(|s| s.as_str())
793        .unwrap_or("");
794
795    // Guard 3: Distinguish frontend HTML from backend API responses
796    let is_html = is_html_content_type(ct);
797    let is_js = ct.contains("javascript") || ct.contains("ecmascript");
798    let looks_minified = is_js && resp.body.len() > 50000 && !resp.body.contains("\n\n");
799
800    // Guard 4: Check if this is a frontend page (HTML with typical web app markers)
801    let is_frontend_page = is_html
802        && (body_lower.contains("<!doctype html>")
803            || body_lower.contains("<html")
804            || body_lower.contains("<head>")
805            || body_lower.contains("<body"));
806
807    for chk in SECRET_CHECKS {
808        // Skip generic patterns on minified JS
809        if looks_minified && matches!(chk.name, "Generic API Key" | "Generic Secret") {
810            continue;
811        }
812
813        if let Some(m) = chk.re.find(&resp.body) {
814            let matched = m.as_str();
815
816            // Additional validation for Generic Secret to avoid false positives
817            if chk.name == "Generic Secret" {
818                // Extract the value part (after the colon/equals)
819                let value_part = matched.rsplit(&[':', '='][..]).next().unwrap_or("");
820                let cleaned = value_part.trim().trim_matches(&['"', '\''][..]);
821
822                // Skip if value is empty, whitespace-only, or looks like a placeholder
823                if cleaned.is_empty()
824                    || cleaned.chars().all(|c| c.is_whitespace())
825                    || cleaned.to_lowercase().contains("password")
826                    || cleaned.to_lowercase().contains("secret")
827                    || cleaned.len() < 12
828                {
829                    debug!(
830                        url = %url,
831                        check = chk.name,
832                        redacted_match = %redact(matched),
833                        "Skipping potential secret match after generic-secret validation"
834                    );
835                    continue;
836                }
837            }
838
839            // Guard 5: For Google API keys found in frontend HTML pages,
840            // always downgrade to LOW — Google Maps/frontend keys are domain-restricted
841            // and never represent backend secret exposure.
842            if chk.name == "Google API Key" && is_frontend_page {
843                findings.push(
844                    Finding::new(
845                        url,
846                        format!("api_security/secret-in-response/{}", slug(chk.name)),
847                        format!("Possible {} in frontend", chk.name),
848                        Severity::Low,
849                        format!("Possible {} found in frontend HTML. Frontend API keys are typically domain-restricted.", chk.name),
850                        "api_security",
851                    )
852                    .with_evidence(format!(
853                        "Pattern: {}\nMatch (redacted): {}\nContext: Frontend HTML\nURL: {url}",
854                        chk.name,
855                        redact(matched)
856                    ))
857                    .with_remediation(
858                        "Verify this key has proper domain restrictions in your API provider console.",
859                    ),
860                );
861                continue;
862            }
863
864            // Guard 6: For Generic API Key in frontend HTML, always downgrade to LOW
865            // Frontend pages embed Firebase/analytics keys that are not backend secrets
866            if chk.name == "Generic API Key" && is_frontend_page {
867                findings.push(
868                    Finding::new(
869                        url,
870                        format!("api_security/secret-in-response/{}", slug(chk.name)),
871                        format!("Possible {} in frontend", chk.name),
872                        Severity::Low,
873                        format!("Possible {} found in frontend HTML. Likely a client-side key.", chk.name),
874                        "api_security",
875                    )
876                    .with_evidence(format!(
877                        "Pattern: {}\nMatch (redacted): {}\nContext: Frontend HTML\nURL: {url}",
878                        chk.name,
879                        redact(matched)
880                    ))
881                    .with_remediation(
882                        "Verify this key is intended for client-side use and has appropriate restrictions.",
883                    ),
884                );
885                continue;
886            }
887
888            let redacted = redact(matched);
889
890            findings.push(
891                Finding::new(
892                    url,
893                    format!("api_security/secret-in-response/{}", slug(chk.name)),
894                    format!("Possible {} in response", chk.name),
895                    Severity::Critical,
896                    format!("Possible {} found in HTTP response body.", chk.name),
897                    "api_security",
898                )
899                .with_evidence(format!(
900                    "Pattern: {}\nMatch (redacted): {redacted}\nURL: {url}",
901                    chk.name
902                ))
903                .with_remediation(
904                    "Remove secrets from responses and rotate exposed credentials immediately.",
905                ),
906            );
907        }
908    }
909}
910
911// ── 2. Verbose error / debug information ─────────────────────────────────────
912
913async fn check_error_disclosure(
914    url: &str,
915    client: &HttpClient,
916    findings: &mut Vec<Finding>,
917    errors: &mut Vec<CapturedError>,
918) {
919    let probe_urls = [format!("{url}/FUZZ_ERROR_XYZ"), format!("{url}?id=_FUZZ_")];
920
921    for probe in &probe_urls {
922        let resp = match client.get(probe).await {
923            Ok(r) => r,
924            Err(e) => {
925                errors.push(e);
926                continue;
927            }
928        };
929
930        for chk in ERROR_CHECKS {
931            if chk.re.is_match(&resp.body) {
932                findings.push(
933                    Finding::new(
934                        url,
935                        format!("api_security/error-disclosure/{}", slug(chk.name)),
936                        format!("Error disclosure: {}", chk.name),
937                        Severity::Medium,
938                        format!(
939                            "Verbose error information leaked: {} detected in response \
940                         to malformed request.",
941                            chk.name
942                        ),
943                        "api_security",
944                    )
945                    .with_evidence(format!(
946                        "Probe URL: {probe}\nStatus: {}\nSnippet: {}",
947                        resp.status,
948                        snippet(&resp.body, 400)
949                    ))
950                    .with_remediation(
951                        "Disable verbose error pages in production and return generic errors.",
952                    ),
953                );
954                break;
955            }
956        }
957    }
958}
959
960// ── 3. HTTP method enumeration ────────────────────────────────────────────────
961
962async fn check_http_methods(
963    url: &str,
964    client: &HttpClient,
965    findings: &mut Vec<Finding>,
966    errors: &mut Vec<CapturedError>,
967    spa_catchall: bool,
968) {
969    if spa_catchall {
970        debug!(url = %url, "SPA catch-all detected; skipping method probing");
971    }
972
973    // First try OPTIONS — it may advertise allowed methods directly.
974    let allowed_from_options = match client.options(url, None).await {
975        Ok(resp) => {
976            let from_allow = resp.headers.get("allow").cloned().unwrap_or_default();
977
978            let from_acam = resp
979                .headers
980                .get("access-control-allow-methods")
981                .cloned()
982                .unwrap_or_default();
983
984            format!("{from_allow},{from_acam}")
985                .split(',')
986                .map(|s| s.trim().to_ascii_uppercase())
987                .filter(|s| !s.is_empty())
988                .collect::<Vec<_>>()
989        }
990        Err(e) => {
991            errors.push(e);
992            vec![]
993        }
994    };
995
996    let mut dangerous_allowed: Vec<String> = Vec::new();
997
998    for method in DANGEROUS_METHODS {
999        let advertised = allowed_from_options.iter().any(|m| m == method);
1000
1001        let actually_allowed = if advertised {
1002            true
1003        } else if spa_catchall {
1004            false
1005        } else {
1006            match client.method_probe(method, url).await {
1007                Ok(r) => r.status < 405,
1008                Err(e) => {
1009                    errors.push(e);
1010                    false
1011                }
1012            }
1013        };
1014
1015        if actually_allowed {
1016            dangerous_allowed.push(method.to_string());
1017        }
1018    }
1019
1020    if dangerous_allowed.contains(&"TRACE".to_string()) {
1021        findings.push(
1022            Finding::new(
1023                url,
1024                "api_security/http-method/trace-enabled",
1025                "HTTP TRACE enabled",
1026                Severity::Low,
1027                "HTTP TRACE method is enabled. Combined with client-side bugs it can \
1028             enable Cross-Site Tracing (XST) attacks.",
1029                "api_security",
1030            )
1031            .with_evidence(format!("TRACE responded with status < 405 on {url}"))
1032            .with_remediation("Disable TRACE at the web server or reverse proxy configuration."),
1033        );
1034    }
1035
1036    let write_methods: Vec<&str> = dangerous_allowed
1037        .iter()
1038        .filter(|m| matches!(m.as_str(), "PUT" | "DELETE" | "PATCH"))
1039        .map(String::as_str)
1040        .collect();
1041
1042    if !write_methods.is_empty() {
1043        findings.push(Finding::new(
1044            url,
1045            "api_security/http-method/write-methods-enabled",
1046            "Write HTTP methods enabled",
1047            Severity::Medium,
1048            format!(
1049                "Write HTTP methods accepted: {}. Verify these require authentication \
1050                 and are not accessible to unauthenticated clients.",
1051                write_methods.join(", ")
1052            ),
1053            "api_security",
1054        )
1055        .with_evidence(format!(
1056            "Methods returning non-405 on {url}: {}",
1057            write_methods.join(", ")
1058        ))
1059        .with_remediation(
1060            "Require authentication/authorization for write methods and disable them when unused.",
1061        ));
1062    }
1063}
1064
1065// ── 4. Debug / admin endpoint exposure ───────────────────────────────────────
1066
1067async fn check_debug_endpoints(
1068    url: &str,
1069    client: &HttpClient,
1070    findings: &mut Vec<Finding>,
1071    errors: &mut Vec<CapturedError>,
1072    spa_fingerprint: Option<(usize, u64)>,
1073) {
1074    let base = url.trim_end_matches('/');
1075    let mut endpoints = DEBUG_ENDPOINTS.iter().collect::<Vec<_>>();
1076    {
1077        let mut rng = rand::thread_rng();
1078        endpoints.shuffle(&mut rng);
1079    }
1080
1081    let critical_keywords = ["env", "config", "secret", "password", "credential", "key"];
1082    let high_keywords = ["actuator", "pprof", "phpinfo", "profiler", "clockwork"];
1083
1084    for ep in endpoints {
1085        let probe = format!("{base}{}", ep.path);
1086        let resp = match client.get(&probe).await {
1087            Ok(r) => r,
1088            Err(e) => {
1089                errors.push(e);
1090                continue;
1091            }
1092        };
1093
1094        // ── Guard 1: must be 200 ─────────────────────────────────────────────
1095        if resp.status != 200 {
1096            continue;
1097        }
1098
1099        let ct = resp
1100            .headers
1101            .get("content-type")
1102            .map(|s| s.as_str())
1103            .unwrap_or("");
1104
1105        // ── Guard 2: SPA catch-all — if we detected one and this response
1106        //    matches the fingerprint (regardless of Content-Type), skip it. ───
1107        if let Some(spa_fp) = &spa_fingerprint {
1108            let resp_fp = body_fingerprint(&resp.body);
1109            // Same length ±20% and same prefix hash → SPA shell
1110            // Increased tolerance to catch SPAs that inject slightly different content
1111            let len_ratio = resp_fp.0 as f64 / spa_fp.0.max(1) as f64;
1112            if (0.80..=1.20).contains(&len_ratio) && resp_fp.1 == spa_fp.1 {
1113                debug!(
1114                    url = %probe,
1115                    "Skipping — matches SPA catch-all fingerprint"
1116                );
1117                continue;
1118            }
1119
1120            // Additional check: if both are HTML and similar size, likely same SPA shell
1121            let ct = resp
1122                .headers
1123                .get("content-type")
1124                .map(|s| s.as_str())
1125                .unwrap_or("");
1126            if is_html_content_type(ct) && (0.70..=1.30).contains(&len_ratio) {
1127                debug!(
1128                    url = %probe,
1129                    "Skipping — HTML response with similar size to SPA shell"
1130                );
1131                continue;
1132            }
1133        }
1134
1135        // ── Guard 3: Content-Type validation ─────────────────────────────────
1136        // If we have expected content-types, check them.
1137        // Any HTML response for config/env endpoints is almost certainly a
1138        // custom error page or SPA rather than real leaked config.
1139        if !ep.expected_ct.is_empty() && !content_type_matches(ct, ep.expected_ct) {
1140            // HTML response for a config/env endpoint → false positive
1141            if is_html_content_type(ct) {
1142                debug!(
1143                    url = %probe,
1144                    ct,
1145                    "Skipping — HTML response for non-HTML endpoint"
1146                );
1147                continue;
1148            }
1149        }
1150
1151        // ── Guard 4: Body content validation ─────────────────────────────────
1152        // The response body must pass at least one validator.
1153        if !ep.body_validators.is_empty() {
1154            let passes = ep.body_validators.iter().any(|v| v(&resp.body));
1155            if !passes {
1156                debug!(
1157                    url = %probe,
1158                    "Skipping — body content does not match expected patterns"
1159                );
1160                continue;
1161            }
1162        }
1163
1164        // ── All guards passed — emit finding ─────────────────────────────────
1165        let lower_path = ep.path.to_ascii_lowercase();
1166
1167        let severity = if critical_keywords.iter().any(|k| lower_path.contains(k)) {
1168            Severity::Critical
1169        } else if high_keywords.iter().any(|k| lower_path.contains(k)) {
1170            Severity::High
1171        } else {
1172            Severity::Medium
1173        };
1174
1175        findings.push(
1176            Finding::new(
1177                url,
1178                format!("api_security/debug-endpoint{}", ep.path.replace('/', "-")),
1179                format!("Debug endpoint exposed: {}", ep.path),
1180                severity,
1181                format!(
1182                    "Debug/admin endpoint publicly accessible: {}. \
1183                 This may expose internal configuration, metrics, or runtime data.",
1184                    ep.path
1185                ),
1186                "api_security",
1187            )
1188            .with_evidence(format!(
1189                "URL: {probe}\nStatus: 200\nContent-Type: {ct}\nBody snippet:\n{}",
1190                snippet(&resp.body, 500)
1191            ))
1192            .with_remediation(
1193                "Restrict debug/admin endpoints to internal networks or require authentication.",
1194            ),
1195        );
1196    }
1197}
1198
1199// ── 5. Directory listing ───────────────────────────────────────────────────────
1200
1201async fn check_directory_listing(
1202    url: &str,
1203    client: &HttpClient,
1204    findings: &mut Vec<Finding>,
1205    errors: &mut Vec<CapturedError>,
1206) {
1207    let mut probe_paths = vec!["/", "/static/", "/assets/", "/uploads/", "/files/"];
1208    {
1209        let mut rng = rand::thread_rng();
1210        probe_paths.shuffle(&mut rng);
1211    }
1212    let base = url.trim_end_matches('/');
1213
1214    for path in probe_paths {
1215        let probe = format!("{base}{path}");
1216        let resp = match client.get(&probe).await {
1217            Ok(r) => r,
1218            Err(e) => {
1219                errors.push(e);
1220                continue;
1221            }
1222        };
1223
1224        if resp.status != 200 {
1225            continue;
1226        }
1227
1228        // Guard: Content-Type should be HTML for a directory listing page
1229        let ct = resp
1230            .headers
1231            .get("content-type")
1232            .map(|s| s.as_str())
1233            .unwrap_or("");
1234        if !ct.is_empty() && !is_html_content_type(ct) && !ct.contains("text/plain") {
1235            continue;
1236        }
1237
1238        let body_lower = resp.body.to_ascii_lowercase();
1239        let matched_marker = DIR_LISTING_MARKERS
1240            .iter()
1241            .find(|&&m| body_lower.contains(&m.to_ascii_lowercase()));
1242
1243        if let Some(marker) = matched_marker {
1244            findings.push(
1245                Finding::new(
1246                    url,
1247                    format!(
1248                        "api_security/directory-listing{}",
1249                        path.trim_end_matches('/').replace('/', "-")
1250                    ),
1251                    format!("Directory listing at {path}"),
1252                    Severity::Medium,
1253                    format!(
1254                        "Directory listing enabled at `{path}`. \
1255                     Attackers can enumerate files and discover sensitive assets."
1256                    ),
1257                    "api_security",
1258                )
1259                .with_evidence(format!(
1260                    "URL: {probe}\nMatched marker: \"{marker}\"\nSnippet:\n{}",
1261                    snippet(&resp.body, 400)
1262                ))
1263                .with_remediation(
1264                    "Disable directory listing in the web server and restrict public file access.",
1265                ),
1266            );
1267        }
1268    }
1269}
1270
1271// ── 6. security.txt presence ──────────────────────────────────────────────────
1272
1273async fn check_security_txt(url: &str, client: &HttpClient, findings: &mut Vec<Finding>) {
1274    let base = url.trim_end_matches('/');
1275    let mut found = false;
1276
1277    for path in SECURITY_TXT_PATHS {
1278        let probe = format!("{base}{path}");
1279        if let Ok(resp) = client.get(&probe).await {
1280            if resp.status == 200 {
1281                let ct = resp
1282                    .headers
1283                    .get("content-type")
1284                    .map(|s| s.as_str())
1285                    .unwrap_or("");
1286                // Genuine security.txt should be text/plain and contain "Contact:"
1287                if !is_html_content_type(ct) && resp.body.to_ascii_lowercase().contains("contact:")
1288                {
1289                    found = true;
1290                    break;
1291                }
1292            }
1293        }
1294    }
1295
1296    if !found {
1297        findings.push(Finding::new(
1298            url,
1299            "api_security/security-txt/missing",
1300            "Missing security.txt",
1301            Severity::Info,
1302            "No valid security.txt found at /.well-known/security.txt or /security.txt. \
1303             RFC 9116 recommends publishing one so researchers can report vulnerabilities.",
1304            "api_security",
1305        ).with_remediation(
1306            "Publish a security.txt with contact and policy details under /.well-known/security.txt.",
1307        ));
1308    }
1309}
1310
1311// ── 7. Response-header security checks ───────────────────────────────────────
1312
1313struct HeaderCheck {
1314    name: &'static str,
1315    slug: &'static str,
1316    detail: &'static str,
1317    severity: Severity,
1318    must_contain: Option<&'static str>,
1319}
1320
1321static HEADER_CHECKS: &[HeaderCheck] = &[
1322    HeaderCheck {
1323        name:         "strict-transport-security",
1324        slug:         "hsts-missing",
1325        detail:       "Strict-Transport-Security header absent. Clients may downgrade to HTTP.",
1326        severity:     Severity::Medium,
1327        must_contain: None,
1328    },
1329    HeaderCheck {
1330        name:         "x-content-type-options",
1331        slug:         "xcto-missing",
1332        detail:       "X-Content-Type-Options header absent. Browsers may MIME-sniff responses.",
1333        severity:     Severity::Low,
1334        must_contain: Some("nosniff"),
1335    },
1336    HeaderCheck {
1337        name:         "x-frame-options",
1338        slug:         "xfo-missing",
1339        detail:       "X-Frame-Options header absent. Page may be embedded in a malicious iframe (clickjacking).",
1340        severity:     Severity::Low,
1341        must_contain: None,
1342    },
1343    HeaderCheck {
1344        name:         "content-security-policy",
1345        slug:         "csp-missing",
1346        detail:       "Content-Security-Policy header absent. Increases risk of XSS and data injection.",
1347        severity:     Severity::Medium,
1348        must_contain: None,
1349    },
1350    HeaderCheck {
1351        name:         "referrer-policy",
1352        slug:         "referrer-policy-missing",
1353        detail:       "Referrer-Policy header absent. Sensitive URL parameters may leak via the Referer header.",
1354        severity:     Severity::Low,
1355        must_contain: None,
1356    },
1357    HeaderCheck {
1358        name:         "permissions-policy",
1359        slug:         "permissions-policy-missing",
1360        detail:       "Permissions-Policy (formerly Feature-Policy) header absent.",
1361        severity:     Severity::Info,
1362        must_contain: None,
1363    },
1364    HeaderCheck {
1365        name:         "cache-control",
1366        slug:         "cache-control-missing",
1367        detail:       "Cache-Control header absent on authenticated endpoint. Sensitive responses may be cached.",
1368        severity:     Severity::Low,
1369        must_contain: None,
1370    },
1371    HeaderCheck {
1372        name:         "x-powered-by",
1373        slug:         "x-powered-by-present",
1374        detail:       "X-Powered-By header present — leaks server technology stack.",
1375        severity:     Severity::Info,
1376        must_contain: None,
1377    },
1378    HeaderCheck {
1379        name:         "server",
1380        slug:         "server-version-leaked",
1381        detail:       "Server header includes a version string, aiding fingerprinting.",
1382        severity:     Severity::Info,
1383        must_contain: None,
1384    },
1385];
1386
1387static VERSION_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\d+\.\d+").unwrap());
1388
1389async fn check_response_headers(
1390    url: &str,
1391    client: &HttpClient,
1392    findings: &mut Vec<Finding>,
1393    errors: &mut Vec<CapturedError>,
1394) {
1395    let resp = match client.get(url).await {
1396        Ok(r) => r,
1397        Err(e) => {
1398            errors.push(e);
1399            return;
1400        }
1401    };
1402
1403    let headers: HashMap<String, String> = resp
1404        .headers
1405        .iter()
1406        .map(|(k, v)| (k.to_ascii_lowercase(), v.clone()))
1407        .collect();
1408
1409    for check in HEADER_CHECKS {
1410        let key = check.name;
1411        let value = headers.get(key);
1412
1413        match key {
1414            "x-powered-by" => {
1415                if value.is_some() {
1416                    findings.push(header_finding(url, check, value));
1417                }
1418            }
1419            "server" => {
1420                if let Some(v) = value {
1421                    if VERSION_RE.is_match(v) {
1422                        findings.push(header_finding(url, check, Some(v)));
1423                    }
1424                }
1425            }
1426            _ => match value {
1427                None => {
1428                    findings.push(header_finding(url, check, None));
1429                }
1430                Some(v) => {
1431                    if let Some(required) = check.must_contain {
1432                        if !v.to_ascii_lowercase().contains(required) {
1433                            findings.push(
1434                                Finding::new(
1435                                    url,
1436                                    format!("api_security/headers/{}-weak", check.slug),
1437                                    format!("{} present but weak", check.name),
1438                                    check.severity.clone(),
1439                                    format!(
1440                                        "{} present but value does not contain `{required}`.",
1441                                        check.name
1442                                    ),
1443                                    "api_security",
1444                                )
1445                                .with_evidence(format!("{}: {v}", check.name))
1446                                .with_remediation(header_remediation(check)),
1447                            );
1448                        }
1449                    }
1450                }
1451            },
1452        }
1453    }
1454}
1455
1456// ── Helpers ───────────────────────────────────────────────────────────────────
1457
1458fn header_finding(url: &str, check: &HeaderCheck, value: Option<&String>) -> Finding {
1459    Finding::new(
1460        url,
1461        format!("api_security/headers/{}", check.slug),
1462        check.detail,
1463        check.severity.clone(),
1464        check.detail,
1465        "api_security",
1466    )
1467    .with_evidence(
1468        value
1469            .map(|v| format!("{}: {v}", check.name))
1470            .unwrap_or_default(),
1471    )
1472    .with_remediation(header_remediation(check))
1473}
1474
1475fn header_remediation(check: &HeaderCheck) -> &'static str {
1476    match check.slug {
1477        "hsts-missing" =>
1478            "Enable HSTS (Strict-Transport-Security) with a long max-age and includeSubDomains.",
1479        "xcto-missing" =>
1480            "Set X-Content-Type-Options: nosniff.",
1481        "xfo-missing" =>
1482            "Set X-Frame-Options to DENY or SAMEORIGIN, or use CSP frame-ancestors.",
1483        "referrer-policy-missing" =>
1484            "Set Referrer-Policy to a restrictive value such as no-referrer or strict-origin-when-cross-origin.",
1485        "permissions-policy-missing" =>
1486            "Set Permissions-Policy to disable unused browser features.",
1487        "x-powered-by-present" =>
1488            "Remove X-Powered-By to reduce stack fingerprinting.",
1489        "server-version-leaked" =>
1490            "Remove or genericize the Server header to reduce fingerprinting.",
1491        _ =>
1492            "Harden response headers according to your security baseline.",
1493    }
1494}
1495
1496fn redact(s: &str) -> String {
1497    redact_secret(s, 4)
1498}
1499
1500fn slug(s: &str) -> String {
1501    slugify(s)
1502}
1503
1504fn snippet(s: &str, max_len: usize) -> String {
1505    shared_snippet(s, max_len)
1506}
1507
1508// ── Active checks (opt-in) ────────────────────────────────────────────────────
1509
1510// ── IDOR / BOLA detection ─────────────────────────────────────────────────────
1511//
1512// Three tiers, each independently useful:
1513//
1514// Tier 1 — Unauthenticated comparison
1515//   The same URL is fetched without credentials. If it returns a 200 with
1516//   different content than the authenticated response, the endpoint may be
1517//   publicly accessible when it shouldn't be.
1518//
1519// Tier 2 — ID range walk
1520//   Walk a small window of IDs around the one in the URL. Track which return
1521//   200 and which return 403/404. A pattern like [200, 200, 200, 403, 403]
1522//   for consecutive IDs suggests authorization is not object-specific.
1523//
1524// Tier 3 — Cross-user comparison (requires client_b)
1525//   Fetch the URL with a second identity. If the second identity gets the
1526//   same content as the first, object-level authorization is missing.
1527
1528async fn check_idor_bola(
1529    url: &str,
1530    client: &HttpClient,
1531    client_b: Option<&HttpClient>,
1532    findings: &mut Vec<Finding>,
1533    errors: &mut Vec<CapturedError>,
1534) {
1535    // Only run on URLs with numeric path segments
1536    let numeric_seg = match find_numeric_segment(url) {
1537        Some(s) => s,
1538        None => return,
1539    };
1540
1541    // ── Tier 1: Unauthenticated comparison ────────────────────────────────────
1542
1543    let authed_resp = match client.get(url).await {
1544        Ok(r) => r,
1545        Err(e) => {
1546            errors.push(e);
1547            return;
1548        }
1549    };
1550
1551    if authed_resp.status >= 400 {
1552        return; // Not a live endpoint with our credentials
1553    }
1554
1555    let unauth_resp = match client.get_without_auth(url).await {
1556        Ok(r) => r,
1557        Err(e) => {
1558            errors.push(e);
1559            return;
1560        }
1561    };
1562
1563    let authed_fp = body_fingerprint(&authed_resp.body);
1564
1565    match unauth_resp.status {
1566        200..=299 => {
1567            let unauth_fp = body_fingerprint(&unauth_resp.body);
1568            if authed_fp == unauth_fp {
1569                // Same content unauthenticated — endpoint is public (may be intentional)
1570                findings.push(
1571                    Finding::new(
1572                        url,
1573                        "api_security/unauthenticated-access",
1574                        "Endpoint accessible without authentication",
1575                        Severity::Medium,
1576                        "Endpoint returns the same response with and without auth credentials. \
1577                         If this resource should be protected, authentication is not enforced.",
1578                        "api_security",
1579                    )
1580                    .with_evidence(format!(
1581                        "Authed: HTTP {}, Unauthed: HTTP {}",
1582                        authed_resp.status, unauth_resp.status
1583                    ))
1584                    .with_remediation(
1585                        "Enforce authentication middleware on all protected endpoints.",
1586                    ),
1587                );
1588            } else {
1589                // Different content unauthenticated — endpoint is accessible but
1590                // returns different data. Could be IDOR or partial access.
1591                findings.push(
1592                    Finding::new(
1593                        url,
1594                        "api_security/partial-unauth-access",
1595                        "Endpoint returns data without authentication",
1596                        Severity::High,
1597                        "Endpoint returns a successful response without credentials but \
1598                         with different content than the authenticated response. \
1599                         The unauthenticated response may contain another user's data.",
1600                        "api_security",
1601                    )
1602                    .with_evidence(format!(
1603                        "Authed status: {}, Unauthed status: {}\n\
1604                         Authed body hash: {:x}, Unauthed body hash: {:x}",
1605                        authed_resp.status, unauth_resp.status, authed_fp.1, unauth_fp.1
1606                    ))
1607                    .with_remediation(
1608                        "Verify object-level authorization is enforced for every identity, \
1609                         including unauthenticated requests.",
1610                    ),
1611                );
1612            }
1613        }
1614        401 | 403 => {
1615            // Auth is being enforced — good. Continue to tier 2.
1616        }
1617        _ => {
1618            // Unusual status — skip
1619        }
1620    }
1621
1622    // ── Tier 2: ID range walk ─────────────────────────────────────────────────
1623    //
1624    // Walk IDs [base-2, base-1, base, base+1, base+2].
1625    // Collect (id, status, body_fp) tuples.
1626    // Finding: if IDs outside the original all return 200, authorization
1627    // may be missing per-object (returns data for any ID).
1628
1629    type RangeResult = (u64, Option<u16>, Option<(usize, u64)>);
1630
1631    let base_id = numeric_seg.value;
1632    let range_ids: Vec<u64> = (base_id.saturating_sub(2)..=base_id + 2).collect();
1633
1634    let mut range_results: Vec<RangeResult> = Vec::new();
1635
1636    for &id in &range_ids {
1637        let probe_url = replace_numeric_segment(url, &numeric_seg, id);
1638        match client.get(&probe_url).await {
1639            Ok(r) => {
1640                let fp = if r.status < 400 {
1641                    Some(body_fingerprint(&r.body))
1642                } else {
1643                    None
1644                };
1645                range_results.push((id, Some(r.status), fp));
1646            }
1647            Err(e) => {
1648                errors.push(e);
1649                range_results.push((id, None, None));
1650            }
1651        }
1652    }
1653
1654    // Count how many IDs outside the original return 200 with real content
1655    let other_successes: Vec<&RangeResult> = range_results
1656        .iter()
1657        .filter(|(id, status, fp)| {
1658            *id != base_id
1659                && status
1660                    .map(|status| (200..400).contains(&status))
1661                    .unwrap_or(false)
1662                && fp.as_ref().map(|f| f.0 > 32).unwrap_or(false)
1663            // non-trivial body
1664        })
1665        .collect();
1666
1667    let other_success_count = other_successes.len();
1668    if other_success_count >= 2 {
1669        let severity = idor_range_walk_severity(other_success_count);
1670
1671        // At least 2 adjacent IDs return valid data — likely no per-object auth
1672        let evidence_lines: Vec<String> = range_results
1673            .iter()
1674            .map(|(id, status, _)| {
1675                let marker = if *id == base_id { " ← original" } else { "" };
1676                let status_display = status
1677                    .map(|status| status.to_string())
1678                    .unwrap_or_else(|| "ERROR".to_string());
1679                format!("  ID {id}: HTTP {status_display}{marker}")
1680            })
1681            .collect();
1682
1683        findings.push(
1684            Finding::new(
1685                url,
1686                "api_security/idor-id-enumerable",
1687                "Object IDs appear enumerable (IDOR/BOLA)",
1688                severity,
1689                format!(
1690                    "{} adjacent IDs near the original resource returned successful responses. \
1691                     Object-level authorization may not be enforced per resource — \
1692                     any authenticated user may be able to access other users' objects.",
1693                    other_success_count
1694                ),
1695                "api_security",
1696            )
1697            .with_evidence(format!(
1698                "ID range probe results:\n{}",
1699                evidence_lines.join("\n")
1700            ))
1701            .with_remediation(
1702                "Enforce object-level authorization (BOLA) checks: verify the requesting \
1703                 identity owns or has explicit access to each requested resource ID.",
1704            ),
1705        );
1706    }
1707
1708    // ── Tier 3: Cross-user comparison ─────────────────────────────────────────
1709
1710    let Some(client_b) = client_b else {
1711        return;
1712    };
1713
1714    let resp_b = match client_b.get(url).await {
1715        Ok(r) => r,
1716        Err(e) => {
1717            errors.push(e);
1718            return;
1719        }
1720    };
1721
1722    // Both identities must get a 200 for this to be meaningful
1723    if resp_b.status >= 400 {
1724        return;
1725    }
1726
1727    let fp_b = body_fingerprint(&resp_b.body);
1728
1729    if authed_fp == fp_b {
1730        // Both users get identical responses — user B can see user A's data
1731        findings.push(
1732            Finding::new(
1733                url,
1734                "api_security/idor-cross-user",
1735                "IDOR: second identity accesses same object (BOLA confirmed)",
1736                Severity::Critical,
1737                "Two different identities received identical responses for the same resource. \
1738                 This confirms broken object-level authorization — a user can access \
1739                 another user's resources using their own valid credentials.",
1740                "api_security",
1741            )
1742            .with_evidence(format!(
1743                "Identity A: HTTP {}, body hash {:x}\n\
1744                 Identity B: HTTP {}, body hash {:x} (identical)",
1745                authed_resp.status, authed_fp.1, resp_b.status, fp_b.1,
1746            ))
1747            .with_remediation(
1748                "Enforce strict object-level authorization. Every resource access must \
1749                 verify the requesting identity's ownership or explicit permission for \
1750                 that specific object — never rely solely on global authentication.",
1751            ),
1752        );
1753    }
1754}
1755
1756fn idor_range_walk_severity(other_success_count: usize) -> Severity {
1757    match other_success_count {
1758        0 | 1 => Severity::Low,
1759        2 => Severity::Medium,
1760        3 => Severity::High,
1761        _ => Severity::Critical,
1762    }
1763}
1764
1765// ── Numeric segment helpers ────────────────────────────────────────────────────
1766
1767#[derive(Debug, Clone)]
1768struct NumericSegment {
1769    /// The index in the path segments array.
1770    segment_index: usize,
1771    /// The numeric value.
1772    value: u64,
1773}
1774
1775fn find_numeric_segment(url: &str) -> Option<NumericSegment> {
1776    let parsed = Url::parse(url).ok()?;
1777    let segments: Vec<String> = parsed.path_segments()?.map(|s| s.to_string()).collect();
1778
1779    // Find the last numeric segment (most likely to be a resource ID)
1780    for (i, seg) in segments.iter().enumerate().rev() {
1781        if let Ok(num) = seg.parse::<u64>() {
1782            // Sanity-check: IDs are typically < 10 billion
1783            // Very large numbers are probably timestamps, not IDs
1784            if num < 10_000_000_000 {
1785                return Some(NumericSegment {
1786                    segment_index: i,
1787                    value: num,
1788                });
1789            }
1790        }
1791    }
1792    None
1793}
1794
1795fn replace_numeric_segment(url: &str, seg: &NumericSegment, new_id: u64) -> String {
1796    let parsed = match Url::parse(url) {
1797        Ok(u) => u,
1798        Err(_) => return url.to_string(),
1799    };
1800    let mut segments: Vec<String> = match parsed.path_segments() {
1801        Some(s) => s.map(|s| s.to_string()).collect(),
1802        None => return url.to_string(),
1803    };
1804
1805    segments[seg.segment_index] = new_id.to_string();
1806    let new_path = format!("/{}", segments.join("/"));
1807    let mut new_url = parsed.clone();
1808    new_url.set_path(&new_path);
1809    new_url.to_string()
1810}
api_scanner/scanner/api_security.rs

api_scanner/scanner/
api_security.rs