web_analyzer/
api_security_scanner.rs

1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashSet;
6use std::time::{Duration, Instant};
7
8use crate::payloads;
9
10// ── Result structs ──────────────────────────────────────────────────────────
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ApiEndpoint {
14    pub url: String,
15    pub status_code: u16,
16    pub api_type: String,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct VulnerabilityFinding {
21    pub vuln_type: String,
22    pub subtype: String,
23    pub endpoint: String,
24    pub parameter: String,
25    pub payload: String,
26    pub severity: String,
27    pub confidence: String,
28    pub evidence: String,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct ApiScanResult {
33    pub domain: String,
34    pub endpoints_found: Vec<ApiEndpoint>,
35    pub vulnerabilities: Vec<VulnerabilityFinding>,
36    pub total_paths_probed: usize,
37    pub endpoints_tested: usize,
38}
39
40// ── HTML killwords — definitive NOT-API indicators ──────────────────────────
41
42const HTML_KILLERS: &[&str] = &[
43    "<!doctype html",
44    "<html",
45    "<head>",
46    "<body>",
47    "<title>",
48    "<div",
49    "<form",
50    "<table",
51    "<script",
52    "not found</title>",
53    "404 not found",
54    "404 - not found",
55    "page not found",
56    "file not found",
57    "apache/2.",
58    "nginx/",
59    "microsoft-iis",
60    "server error",
61    "access denied",
62    "forbidden",
63    "directory listing",
64    "index of /",
65    "<h1>404</h1>",
66    "<h1>error</h1>",
67];
68
69// ── Swagger/OpenAPI documentation indicators ────────────────────────────────
70
71const DOC_INDICATORS: &[&str] = &[
72    "\"openapi\":",
73    "\"swagger\":",
74    "\"info\":",
75    "\"paths\":",
76    "\"components\":",
77    "\"definitions\":",
78    "\"host\":",
79    "\"basepath\":",
80    "\"schemes\":",
81    "\"consumes\":",
82    "\"produces\":",
83];
84
85const DOC_URL_HINTS: &[&str] = &[
86    "openapi",
87    "swagger",
88    "docs",
89    "spec",
90    "schema",
91    "definition",
92    ".json",
93    ".yaml",
94    ".yml",
95];
96
97// ── API-specific response headers ───────────────────────────────────────────
98
99const API_HEADERS: &[&str] = &[
100    "x-api-version",
101    "x-api-key",
102    "x-rate-limit",
103    "x-ratelimit",
104    "x-request-id",
105    "x-correlation-id",
106    "x-trace-id",
107];
108
109const FRAMEWORK_SERVERS: &[&str] = &[
110    "express", "koa", "fastify", "spring", "django", "flask", "tornado", "rails", "sinatra",
111    "fastapi",
112];
113
114// ── Auth error patterns (regex) ─────────────────────────────────────────────
115
116const AUTH_ERROR_PATTERNS: &[&str] = &[
117    r#""error"\s*:\s*"(unauthorized|forbidden|invalid.*token|missing.*auth)"#,
118    r#""message"\s*:\s*"(unauthorized|forbidden|authentication|authorization)"#,
119    r#""code"\s*:\s*"(401|403|auth_required|token_invalid)"#,
120    r#""status"\s*:\s*"(unauthorized|forbidden|error)","#,
121    r#""access_token""#,
122    r#""api_key""#,
123    r#""authentication.*required""#,
124    r#""invalid.*credentials""#,
125];
126
127// ── RESTful API structure patterns ──────────────────────────────────────────
128
129const API_STRUCTURE_PATTERNS: &[&str] = &[
130    r#"^\s*\{\s*"data"\s*:\s*[\{\[]"#,
131    r#"^\s*\{\s*"result"\s*:\s*[\{\[]"#,
132    r#"^\s*\{\s*"results"\s*:\s*\["#,
133    r#"^\s*\{\s*"items"\s*:\s*\["#,
134    r#"^\s*\{\s*"records"\s*:\s*\["#,
135    r#"^\s*\{\s*"version"\s*:\s*"[^"]*""#,
136    r#"^\s*\{\s*"api_version"\s*:\s*"[^"]*""#,
137    r#"^\s*\{\s*"timestamp"\s*:\s*\d+"#,
138    r#"^\s*\{\s*"error"\s*:\s*\{\s*"code""#,
139    r#"^\s*\{\s*"error"\s*:\s*\{\s*"message""#,
140    r#"^\s*\{\s*"errors"\s*:\s*\[.*"message""#,
141    r#"^\s*\{\s*"success"\s*:\s*(true|false)"#,
142    r#"^\s*\{\s*"status"\s*:\s*"(up|down|ok|healthy|error|fail|success)""#,
143    r#"^\s*\{\s*"health"\s*:\s*"(up|down|ok)""#,
144];
145
146// ── SQL error patterns ──────────────────────────────────────────────────────
147
148const SQL_ERROR_PATTERNS: &[&str] = &[
149    r"You have an error in your SQL syntax",
150    r"MySQL server version for the right syntax",
151    r"PostgreSQL.*ERROR.*syntax error",
152    r"ORA-[0-9]{5}.*invalid identifier",
153    r"SQLite error.*syntax error",
154    r"SQLException.*invalid column name",
155    r"mysql_fetch_array\(\).*expects parameter",
156    r"Warning.*mysql_.*\(\).*supplied argument",
157];
158
159// ── JS API endpoint extraction patterns ─────────────────────────────────────
160
161const JS_API_PATTERNS: &[&str] = &[
162    r#"fetch\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
163    r#"axios\.[a-z]+\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
164    r#"\$\.ajax\([^)]*url\s*:\s*['"`](/[^'"`\s]+)['"`]"#,
165    r#"\$\.get\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
166    r#"\$\.post\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
167    r#"apiUrl\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
168    r#"API_URL\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
169    r#"baseURL\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
170    r#"endpoint\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
171];
172
173// ── Main scanner ────────────────────────────────────────────────────────────
174
175pub async fn scan_api_endpoints(
176    domain: &str,
177) -> Result<ApiScanResult, Box<dyn std::error::Error + Send + Sync>> {
178    let base_url = if domain.starts_with("http") {
179        domain.to_string()
180    } else {
181        format!("https://{}", domain)
182    };
183
184    let client = Client::builder()
185        .timeout(Duration::from_secs(15))
186        .danger_accept_invalid_certs(true)
187        .redirect(reqwest::redirect::Policy::limited(3))
188        .build()?;
189
190    // ── Phase 1: Endpoint Discovery ─────────────────────────────────────
191    let mut verified_endpoints: Vec<ApiEndpoint> = Vec::new();
192
193    // 1a. Probe paths from embedded api_endpoints.txt
194    let api_paths = payloads::lines(payloads::API_ENDPOINTS);
195    let total_paths_probed = api_paths.len();
196
197    for path in &api_paths {
198        let url = format!("{}{}", base_url.trim_end_matches('/'), path);
199        if let Some(endpoint) = verify_endpoint(&client, &url).await {
200            verified_endpoints.push(endpoint);
201        }
202    }
203
204    // 1b. Extract endpoints from JavaScript on main page
205    let js_endpoints = extract_js_endpoints(&client, &base_url).await;
206    for url in &js_endpoints {
207        if !verified_endpoints.iter().any(|e| e.url == *url) {
208            if let Some(endpoint) = verify_endpoint(&client, url).await {
209                verified_endpoints.push(endpoint);
210            }
211        }
212    }
213
214    // 1c. Extract API paths from robots.txt and sitemap.xml
215    let robots_endpoints = extract_robots_sitemap_endpoints(&client, &base_url).await;
216    for url in &robots_endpoints {
217        if !verified_endpoints.iter().any(|e| e.url == *url) {
218            if let Some(endpoint) = verify_endpoint(&client, url).await {
219                verified_endpoints.push(endpoint);
220            }
221        }
222    }
223
224    // 1d. Scrape Swagger/OpenAPI documentation for real paths
225    let doc_endpoints = scrape_documentation_endpoints(&client, &base_url).await;
226    for url in &doc_endpoints {
227        if !verified_endpoints.iter().any(|e| e.url == *url) {
228            if let Some(endpoint) = verify_endpoint(&client, url).await {
229                verified_endpoints.push(endpoint);
230            }
231        }
232    }
233
234    // 1e. Check common API subdomains
235    let subdomain_endpoints = check_api_subdomains(&client, domain).await;
236    for url in &subdomain_endpoints {
237        if !verified_endpoints.iter().any(|e| e.url == *url) {
238            if let Some(endpoint) = verify_endpoint(&client, url).await {
239                verified_endpoints.push(endpoint);
240            }
241        }
242    }
243
244    // ── Phase 2: Vulnerability Testing ──────────────────────────────────
245    let mut vulnerabilities: Vec<VulnerabilityFinding> = Vec::new();
246    let endpoints_tested = verified_endpoints.len();
247
248    for ep in &verified_endpoints {
249        let mut findings = test_endpoint(&client, &ep.url).await;
250        vulnerabilities.append(&mut findings);
251
252        // Early exit on excessive criticals
253        let critical_count = vulnerabilities
254            .iter()
255            .filter(|v| v.severity == "CRITICAL")
256            .count();
257        if critical_count >= 10 {
258            break;
259        }
260    }
261
262    Ok(ApiScanResult {
263        domain: domain.to_string(),
264        endpoints_found: verified_endpoints,
265        vulnerabilities,
266        total_paths_probed,
267        endpoints_tested,
268    })
269}
270
271// ── Advanced API endpoint verification ──────────────────────────────────────
272
273async fn verify_endpoint(client: &Client, url: &str) -> Option<ApiEndpoint> {
274    // Try GET first, then OPTIONS, HEAD — majority voting
275    let methods = ["GET", "OPTIONS", "HEAD"];
276    let mut votes: Vec<(String, u16)> = Vec::new(); // (api_type, status)
277
278    for method in &methods {
279        let req = match *method {
280            "GET" => client.get(url),
281            "OPTIONS" => client.request(reqwest::Method::OPTIONS, url),
282            "HEAD" => client.head(url),
283            _ => continue,
284        };
285
286        let resp = match req.send().await {
287            Ok(r) => r,
288            Err(_) => continue,
289        };
290
291        let status = resp.status().as_u16();
292
293        // Immediate disqualifiers
294        if matches!(status, 404 | 502 | 503 | 500) {
295            continue;
296        }
297
298        let headers: Vec<(String, String)> = resp
299            .headers()
300            .iter()
301            .map(|(k, v)| {
302                (
303                    k.as_str().to_lowercase(),
304                    v.to_str().unwrap_or("").to_lowercase(),
305                )
306            })
307            .collect();
308
309        let content_type = headers
310            .iter()
311            .find(|(k, _)| k == "content-type")
312            .map(|(_, v)| v.as_str())
313            .unwrap_or("");
314
315        // For HEAD/OPTIONS we can't read body, just check headers
316        if *method != "GET" {
317            if let Some(api_type) = detect_api_from_headers(content_type, &headers, status) {
318                votes.push((api_type, status));
319            }
320            continue;
321        }
322
323        // GET — full body analysis
324        let body = match resp.text().await {
325            Ok(t) => t,
326            Err(_) => continue,
327        };
328
329        if body.trim().len() < 5 {
330            continue;
331        }
332
333        let sample = if body.len() > 5000 {
334            &body[..5000]
335        } else {
336            &body
337        };
338        let sample_lower = sample.to_lowercase();
339
340        // HTML killer filter
341        if HTML_KILLERS.iter().any(|k| sample_lower.contains(k)) {
342            continue;
343        }
344
345        // Documentation file detection
346        let is_doc_url = DOC_URL_HINTS.iter().any(|h| url.to_lowercase().contains(h));
347        if is_doc_url {
348            let doc_score: usize = DOC_INDICATORS
349                .iter()
350                .filter(|d| sample_lower.contains(*d))
351                .count();
352            if doc_score >= 3 {
353                continue; // Skip API documentation files
354            }
355        }
356
357        // Content-type based definitive detection
358        let ct_api = if content_type.contains("application/json") {
359            // Verify valid JSON
360            if serde_json::from_str::<serde_json::Value>(sample).is_ok() {
361                Some("REST/JSON".to_string())
362            } else {
363                None
364            }
365        } else if content_type.contains("application/xml") || content_type.contains("text/xml") {
366            Some("REST/XML".to_string())
367        } else if content_type.contains("graphql") {
368            Some("GraphQL".to_string())
369        } else if content_type.contains("application/vnd.api+json") {
370            Some("JSON:API".to_string())
371        } else if content_type.contains("application/hal+json") {
372            Some("HAL+JSON".to_string())
373        } else if content_type.contains("application/problem+json") {
374            Some("Problem Details".to_string())
375        } else {
376            None
377        };
378
379        if let Some(api_type) = ct_api {
380            votes.push((api_type, status));
381            continue;
382        }
383
384        // Auth-protected endpoint detection (401/403)
385        if matches!(status, 401 | 403) {
386            let auth_headers = [
387                "www-authenticate",
388                "x-api-key",
389                "x-auth-token",
390                "x-rate-limit",
391            ];
392            if auth_headers
393                .iter()
394                .any(|h| headers.iter().any(|(k, _)| k == h))
395            {
396                votes.push(("Protected API".to_string(), status));
397                continue;
398            }
399            // Check body for API-style auth errors
400            let auth_regexes: Vec<Regex> = AUTH_ERROR_PATTERNS
401                .iter()
402                .filter_map(|p| Regex::new(p).ok())
403                .collect();
404            if auth_regexes.iter().any(|rx| rx.is_match(&sample_lower)) {
405                votes.push(("Protected API".to_string(), status));
406                continue;
407            }
408        }
409
410        // API structure pattern scoring
411        let structure_regexes: Vec<Regex> = API_STRUCTURE_PATTERNS
412            .iter()
413            .filter_map(|p| Regex::new(p).ok())
414            .collect();
415        let structure_score: usize = structure_regexes
416            .iter()
417            .filter(|rx| rx.is_match(sample))
418            .count();
419
420        // API header scoring
421        let api_header_score: usize = API_HEADERS
422            .iter()
423            .filter(|h| headers.iter().any(|(k, _)| k == **h))
424            .count();
425
426        // Framework detection via Server header
427        let framework_score: usize = headers
428            .iter()
429            .filter(|(k, _)| k == "server")
430            .map(|(_, v)| FRAMEWORK_SERVERS.iter().filter(|f| v.contains(*f)).count() * 2)
431            .sum();
432
433        let total_score = structure_score + api_header_score + framework_score;
434
435        if total_score >= 4 || (total_score >= 2 && status == 200) {
436            votes.push(("REST API".to_string(), status));
437        }
438    }
439
440    // Majority voting
441    if votes.is_empty() {
442        return None;
443    }
444
445    // Pick the best vote (prefer 2xx status)
446    let best = votes
447        .iter()
448        .max_by_key(|(_, s)| {
449            if *s < 400 {
450                1000 - *s as i32
451            } else {
452                -((*s) as i32)
453            }
454        })
455        .unwrap();
456
457    Some(ApiEndpoint {
458        url: url.to_string(),
459        status_code: best.1,
460        api_type: best.0.clone(),
461    })
462}
463
464fn detect_api_from_headers(
465    content_type: &str,
466    headers: &[(String, String)],
467    status: u16,
468) -> Option<String> {
469    if content_type.contains("application/json") {
470        return Some("REST/JSON".to_string());
471    }
472    if content_type.contains("application/xml") || content_type.contains("text/xml") {
473        return Some("REST/XML".to_string());
474    }
475    if content_type.contains("graphql") {
476        return Some("GraphQL".to_string());
477    }
478    if matches!(status, 401 | 403) {
479        let auth_headers = ["www-authenticate", "x-api-key", "x-rate-limit"];
480        if auth_headers
481            .iter()
482            .any(|h| headers.iter().any(|(k, _)| k == h))
483        {
484            return Some("Protected API".to_string());
485        }
486    }
487    None
488}
489
490// ── Endpoint Discovery Helpers ──────────────────────────────────────────────
491
492async fn extract_js_endpoints(client: &Client, base_url: &str) -> Vec<String> {
493    let mut endpoints = HashSet::new();
494    let resp = match client.get(base_url).send().await {
495        Ok(r) if r.status().is_success() => r,
496        _ => return Vec::new(),
497    };
498    let body = match resp.text().await {
499        Ok(t) => t,
500        Err(_) => return Vec::new(),
501    };
502
503    // Collect inline JS
504    let mut all_js = String::new();
505    let mut external_urls = Vec::new();
506
507    {
508        let doc = Html::parse_document(&body);
509        let script_sel = Selector::parse("script").unwrap();
510        for el in doc.select(&script_sel) {
511            let inline = el.text().collect::<String>();
512            if inline.len() > 10 {
513                all_js.push('\n');
514                all_js.push_str(&inline);
515            }
516            // Fetch up to 10 external JS files
517            if let Some(src) = el.value().attr("src") {
518                if external_urls.len() < 10 {
519                    external_urls.push(src.to_string());
520                }
521            }
522        }
523    }
524
525    for src in external_urls {
526        if endpoints.len() > 10 {
527            break;
528        }
529        if let Some(js_url) = resolve_url(base_url, &src) {
530            if let Ok(resp) = client.get(&js_url).send().await {
531                if resp.status().is_success() {
532                    if let Ok(js_body) = resp.text().await {
533                        all_js.push('\n');
534                        all_js.push_str(&js_body);
535                    }
536                }
537            }
538        }
539    }
540
541    // Extract API paths from JS content
542    let regexes: Vec<Regex> = JS_API_PATTERNS
543        .iter()
544        .filter_map(|p| Regex::new(p).ok())
545        .collect();
546
547    for rx in &regexes {
548        for cap in rx.captures_iter(&all_js) {
549            if let Some(m) = cap.get(1) {
550                let path = m.as_str().trim();
551                if path.is_empty() {
552                    continue;
553                }
554                // Skip static assets
555                if [".js", ".css", ".png", ".jpg", ".gif", ".ico", ".svg"]
556                    .iter()
557                    .any(|ext| path.to_lowercase().ends_with(ext))
558                {
559                    continue;
560                }
561                let full = format!("{}{}", base_url.trim_end_matches('/'), path);
562                endpoints.insert(full);
563            }
564        }
565    }
566
567    endpoints.into_iter().collect()
568}
569
570async fn extract_robots_sitemap_endpoints(client: &Client, base_url: &str) -> Vec<String> {
571    let mut endpoints = HashSet::new();
572
573    // robots.txt
574    let robots_url = format!("{}/robots.txt", base_url.trim_end_matches('/'));
575    if let Ok(resp) = client.get(&robots_url).send().await {
576        if resp.status().is_success() {
577            if let Ok(body) = resp.text().await {
578                for line in body.lines() {
579                    let line = line.trim().to_lowercase();
580                    if (line.starts_with("disallow:") || line.starts_with("allow:"))
581                        && line.contains(':')
582                    {
583                        let path = line.split_once(':').map(|(_, v)| v.trim()).unwrap_or("");
584                        if !path.is_empty()
585                            && path != "/"
586                            && ["api", "graphql", "rest"]
587                                .iter()
588                                .any(|kw| path.contains(kw))
589                        {
590                            endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), path));
591                        }
592                    }
593                }
594            }
595        }
596    }
597
598    // sitemap.xml
599    let sitemap_url = format!("{}/sitemap.xml", base_url.trim_end_matches('/'));
600    if let Ok(resp) = client.get(&sitemap_url).send().await {
601        if resp.status().is_success() {
602            if let Ok(body) = resp.text().await {
603                if let Ok(rx) = Regex::new(r"<loc>([^<]+)</loc>") {
604                    for cap in rx.captures_iter(&body) {
605                        if let Some(m) = cap.get(1) {
606                            let url = m.as_str();
607                            if ["api", "graphql", "rest"]
608                                .iter()
609                                .any(|kw| url.to_lowercase().contains(kw))
610                            {
611                                endpoints.insert(url.to_string());
612                            }
613                        }
614                    }
615                }
616            }
617        }
618    }
619
620    endpoints.into_iter().collect()
621}
622
623async fn scrape_documentation_endpoints(client: &Client, base_url: &str) -> Vec<String> {
624    let mut endpoints = HashSet::new();
625    let doc_paths = [
626        "/swagger.json",
627        "/openapi.json",
628        "/api-docs",
629        "/docs",
630        "/swagger",
631        "/api/swagger.json",
632        "/api/docs",
633    ];
634
635    for path in &doc_paths {
636        let url = format!("{}{}", base_url.trim_end_matches('/'), path);
637        let resp = match client.get(&url).send().await {
638            Ok(r) if r.status().is_success() => r,
639            _ => continue,
640        };
641        let body = match resp.text().await {
642            Ok(t) => t,
643            Err(_) => continue,
644        };
645
646        // Try to parse as JSON and extract "paths" key
647        if let Ok(doc) = serde_json::from_str::<serde_json::Value>(&body) {
648            if let Some(paths) = doc.get("paths").and_then(|p| p.as_object()) {
649                for path_key in paths.keys() {
650                    if path_key.starts_with('/') {
651                        endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), path_key));
652                    }
653                }
654            }
655            if let Some(base_path) = doc.get("basePath").and_then(|b| b.as_str()) {
656                if !base_path.is_empty() {
657                    endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), base_path));
658                }
659            }
660        }
661    }
662
663    endpoints.into_iter().collect()
664}
665
666async fn check_api_subdomains(client: &Client, domain: &str) -> Vec<String> {
667    let mut endpoints = Vec::new();
668    let bare_domain = domain
669        .trim_start_matches("https://")
670        .trim_start_matches("http://")
671        .split('/')
672        .next()
673        .unwrap_or(domain);
674
675    let parts: Vec<&str> = bare_domain.split('.').collect();
676    if parts.len() < 2 {
677        return endpoints;
678    }
679
680    let base = format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1]);
681
682    let prefixes = [
683        "api",
684        "rest",
685        "graphql",
686        "gateway",
687        "api-v1",
688        "api-v2",
689        "api-dev",
690        "dev-api",
691        "api-staging",
692        "staging-api",
693        "mobile-api",
694        "app-api",
695        "admin-api",
696        "auth-api",
697    ];
698
699    for prefix in &prefixes[..8] {
700        // limit to avoid excessive requests
701        for proto in &["https", "http"] {
702            let url = format!("{}://{}.{}", proto, prefix, base);
703            if let Ok(resp) = client.get(&url).send().await {
704                if resp.status().is_success() || matches!(resp.status().as_u16(), 401 | 403) {
705                    endpoints.push(url);
706                    break; // Found, skip other protocol
707                }
708            }
709        }
710    }
711
712    endpoints
713}
714
715// ── Vulnerability Testing ───────────────────────────────────────────────────
716
717async fn test_endpoint(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
718    let mut findings = Vec::new();
719
720    findings.append(&mut test_sql_injection(client, endpoint).await);
721    findings.append(&mut test_xss(client, endpoint).await);
722    findings.append(&mut test_ssti(client, endpoint).await);
723    findings.append(&mut test_ssrf(client, endpoint).await);
724    findings.append(&mut test_auth_bypass(client, endpoint).await);
725    findings.append(&mut test_command_injection(client, endpoint).await);
726    findings.append(&mut test_nosql_injection(client, endpoint).await);
727    findings.append(&mut test_xxe(client, endpoint).await);
728    findings.append(&mut test_lfi(client, endpoint).await);
729
730    findings
731}
732
733// ── SQLi ────────────────────────────────────────────────────────────────────
734
735async fn test_sql_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
736    let mut findings = Vec::new();
737    let sqli_payloads = payloads::lines(payloads::SQL_INJECTION);
738    let params = ["id", "user", "search", "q", "filter"];
739
740    let error_regexes: Vec<Regex> = SQL_ERROR_PATTERNS
741        .iter()
742        .filter_map(|p| Regex::new(p).ok())
743        .collect();
744
745    for param in &params[..3] {
746        // Baseline
747        let baseline_url = format!("{}?{}=1", endpoint, param);
748        let baseline_body = match fetch_body(client, &baseline_url).await {
749            Some(b) => b,
750            None => continue,
751        };
752        if error_regexes.iter().any(|rx| rx.is_match(&baseline_body)) {
753            continue; // Baseline already has SQL errors
754        }
755
756        for payload in sqli_payloads.iter().take(5) {
757            let encoded = urlencoding::encode(payload);
758            let test_url = format!("{}?{}={}", endpoint, param, encoded);
759
760            // Time-based detection
761            if payload.to_uppercase().contains("SLEEP")
762                || payload.to_uppercase().contains("WAITFOR")
763            {
764                let start = Instant::now();
765                if let Ok(resp) = client.get(&test_url).send().await {
766                    let elapsed = start.elapsed().as_secs_f64();
767                    let _ = resp.text().await;
768                    if elapsed > 4.8 {
769                        findings.push(VulnerabilityFinding {
770                            vuln_type: "SQL_INJECTION".into(),
771                            subtype: "Time-based Blind".into(),
772                            endpoint: endpoint.into(),
773                            parameter: param.to_string(),
774                            payload: payload.to_string(),
775                            severity: "CRITICAL".into(),
776                            confidence: "MEDIUM".into(),
777                            evidence: format!("Response delayed {:.1}s", elapsed),
778                        });
779                        return findings;
780                    }
781                }
782                continue;
783            }
784
785            // Error-based detection
786            if let Some(body) = fetch_body(client, &test_url).await {
787                for rx in &error_regexes {
788                    if let Some(m) = rx.find(&body) {
789                        if !rx.is_match(&baseline_body) {
790                            findings.push(VulnerabilityFinding {
791                                vuln_type: "SQL_INJECTION".into(),
792                                subtype: "Error-based".into(),
793                                endpoint: endpoint.into(),
794                                parameter: param.to_string(),
795                                payload: payload.to_string(),
796                                severity: "CRITICAL".into(),
797                                confidence: "HIGH".into(),
798                                evidence: format!("SQL error: {}", m.as_str()),
799                            });
800                            return findings;
801                        }
802                    }
803                }
804            }
805        }
806    }
807
808    findings
809}
810
811// ── XSS ─────────────────────────────────────────────────────────────────────
812
813async fn test_xss(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
814    let mut findings = Vec::new();
815    let xss_payloads = payloads::lines(payloads::XSS);
816    let params = ["q", "search", "query", "keyword", "name"];
817
818    for payload in xss_payloads.iter().take(5) {
819        for param in &params[..3] {
820            let encoded = urlencoding::encode(payload);
821            let test_url = format!("{}?{}={}", endpoint, param, encoded);
822
823            let resp = match client.get(&test_url).send().await {
824                Ok(r) => r,
825                Err(_) => continue,
826            };
827
828            if !resp.status().is_success() {
829                continue;
830            }
831
832            let ct = resp
833                .headers()
834                .get("content-type")
835                .and_then(|v| v.to_str().ok())
836                .unwrap_or("")
837                .to_lowercase();
838
839            if !ct.contains("text/html") {
840                continue;
841            }
842
843            let body = match resp.text().await {
844                Ok(t) => t,
845                Err(_) => continue,
846            };
847
848            // Payload reflected unencoded in HTML
849            if body.contains(payload) && !is_payload_safe_context(&body, payload) {
850                findings.push(VulnerabilityFinding {
851                    vuln_type: "XSS".into(),
852                    subtype: "Reflected".into(),
853                    endpoint: endpoint.into(),
854                    parameter: param.to_string(),
855                    payload: payload.to_string(),
856                    severity: "HIGH".into(),
857                    confidence: "HIGH".into(),
858                    evidence: "Payload reflected in HTML without encoding".into(),
859                });
860                return findings;
861            }
862        }
863    }
864    findings
865}
866
867fn is_payload_safe_context(content: &str, payload: &str) -> bool {
868    let pos = match content.find(payload) {
869        Some(p) => p,
870        None => return true,
871    };
872    // Inside HTML comment?
873    let before = &content[..pos];
874    let after = &content[pos..];
875    if before.rfind("<!--").is_some() && after.contains("-->") {
876        let comment_start = before.rfind("<!--").unwrap();
877        if !before[comment_start..].contains("-->") {
878            return true;
879        }
880    }
881    // Properly encoded?
882    let encoded = payload.replace('<', "&lt;").replace('>', "&gt;");
883    if content.contains(&encoded) {
884        return true;
885    }
886    false
887}
888
889// ── SSTI ────────────────────────────────────────────────────────────────────
890
891async fn test_ssti(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
892    let mut findings = Vec::new();
893    let tests = [
894        ("{{7*7*7}}", "343"),
895        ("{{9*9*9}}", "729"),
896        ("${8*8*8}", "512"),
897        ("{{42*13}}", "546"),
898    ];
899    let params = ["template", "name", "msg", "content"];
900
901    for &(payload, expected) in &tests {
902        for param in &params[..3] {
903            // Baseline
904            let baseline_url = format!("{}?{}=normaltext", endpoint, param);
905            let baseline = match fetch_body(client, &baseline_url).await {
906                Some(b) => b,
907                None => continue,
908            };
909
910            let encoded = urlencoding::encode(payload);
911            let test_url = format!("{}?{}={}", endpoint, param, encoded);
912
913            if let Some(body) = fetch_body(client, &test_url).await {
914                if body.contains(expected)
915                    && !body.contains(payload)
916                    && !baseline.contains(expected)
917                {
918                    findings.push(VulnerabilityFinding {
919                        vuln_type: "SSTI".into(),
920                        subtype: "Template Injection".into(),
921                        endpoint: endpoint.into(),
922                        parameter: param.to_string(),
923                        payload: payload.to_string(),
924                        severity: "CRITICAL".into(),
925                        confidence: "HIGH".into(),
926                        evidence: format!("Template executed: {} = {}", payload, expected),
927                    });
928                    return findings;
929                }
930            }
931        }
932    }
933    findings
934}
935
936// ── SSRF ────────────────────────────────────────────────────────────────────
937
938async fn test_ssrf(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
939    let mut findings = Vec::new();
940    let ssrf_payloads = payloads::lines(payloads::SSRF);
941    let params = ["url", "uri", "path", "dest", "redirect"];
942    let indicators = [
943        "root:",
944        "daemon:",
945        "localhost",
946        "metadata",
947        "ami-id",
948        "instance-id",
949    ];
950
951    for param in &params[..3] {
952        for payload in ssrf_payloads.iter().take(3) {
953            let encoded = urlencoding::encode(payload);
954            let test_url = format!("{}?{}={}", endpoint, param, encoded);
955
956            if let Some(body) = fetch_body(client, &test_url).await {
957                for indicator in &indicators {
958                    if body.contains(indicator) {
959                        findings.push(VulnerabilityFinding {
960                            vuln_type: "SSRF".into(),
961                            subtype: "Server-Side Request Forgery".into(),
962                            endpoint: endpoint.into(),
963                            parameter: param.to_string(),
964                            payload: payload.to_string(),
965                            severity: "CRITICAL".into(),
966                            confidence: "HIGH".into(),
967                            evidence: format!("Internal data leaked: {}", indicator),
968                        });
969                        return findings;
970                    }
971                }
972            }
973        }
974    }
975    findings
976}
977
978// ── Auth Bypass ─────────────────────────────────────────────────────────────
979
980async fn test_auth_bypass(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
981    let mut findings = Vec::new();
982
983    // Check if endpoint is normally protected
984    let normal_status = match client.get(endpoint).send().await {
985        Ok(r) => r.status().as_u16(),
986        Err(_) => return findings,
987    };
988    if !matches!(normal_status, 401 | 403) {
989        return findings; // Not protected, skip
990    }
991
992    let bypass_headers = payloads::auth_headers(payloads::AUTH_BYPASS_HEADERS);
993
994    for (name, value) in bypass_headers.iter().take(10) {
995        let resp = match client
996            .get(endpoint)
997            .header(name as &str, value as &str)
998            .send()
999            .await
1000        {
1001            Ok(r) => r,
1002            Err(_) => continue,
1003        };
1004
1005        if resp.status().as_u16() == 200 {
1006            findings.push(VulnerabilityFinding {
1007                vuln_type: "AUTH_BYPASS".into(),
1008                subtype: "Header-based".into(),
1009                endpoint: endpoint.into(),
1010                parameter: String::new(),
1011                payload: format!("{}: {}", name, value),
1012                severity: "CRITICAL".into(),
1013                confidence: "HIGH".into(),
1014                evidence: format!("Bypass with header {}: {}", name, value),
1015            });
1016            return findings;
1017        }
1018    }
1019    findings
1020}
1021
1022// ── Command Injection ───────────────────────────────────────────────────────
1023
1024async fn test_command_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1025    let mut findings = Vec::new();
1026    let cmd_payloads = payloads::lines(payloads::COMMAND_INJECTION);
1027    let params = ["cmd", "exec", "command", "ping", "host"];
1028
1029    for param in &params[..3] {
1030        for payload in cmd_payloads.iter().take(3) {
1031            if payload.to_lowercase().contains("sleep") {
1032                let encoded = urlencoding::encode(payload);
1033                let test_url = format!("{}?{}={}", endpoint, param, encoded);
1034                let start = Instant::now();
1035                if let Ok(resp) = client.get(&test_url).send().await {
1036                    let elapsed = start.elapsed().as_secs_f64();
1037                    let _ = resp.text().await;
1038                    if elapsed > 4.5 {
1039                        findings.push(VulnerabilityFinding {
1040                            vuln_type: "COMMAND_INJECTION".into(),
1041                            subtype: "Time-based".into(),
1042                            endpoint: endpoint.into(),
1043                            parameter: param.to_string(),
1044                            payload: payload.to_string(),
1045                            severity: "CRITICAL".into(),
1046                            confidence: "HIGH".into(),
1047                            evidence: format!("Command executed (delay: {:.1}s)", elapsed),
1048                        });
1049                        return findings;
1050                    }
1051                }
1052            }
1053        }
1054    }
1055    findings
1056}
1057
1058// ── NoSQL Injection ─────────────────────────────────────────────────────────
1059
1060async fn test_nosql_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1061    let mut findings = Vec::new();
1062    let nosql_payloads = payloads::lines(payloads::NOSQL_INJECTION);
1063
1064    for payload in nosql_payloads.iter().take(3) {
1065        let resp = match client
1066            .post(endpoint)
1067            .header("Content-Type", "application/json")
1068            .body(payload.to_string())
1069            .send()
1070            .await
1071        {
1072            Ok(r) => r,
1073            Err(_) => continue,
1074        };
1075
1076        if matches!(resp.status().as_u16(), 200 | 201) {
1077            let body = match resp.text().await {
1078                Ok(t) => t,
1079                Err(_) => continue,
1080            };
1081            if body.len() > 100 && !body.to_lowercase().contains("error") {
1082                findings.push(VulnerabilityFinding {
1083                    vuln_type: "NOSQL_INJECTION".into(),
1084                    subtype: "Operator Injection".into(),
1085                    endpoint: endpoint.into(),
1086                    parameter: String::new(),
1087                    payload: payload.to_string(),
1088                    severity: "HIGH".into(),
1089                    confidence: "MEDIUM".into(),
1090                    evidence: "NoSQL operator accepted, returned data".into(),
1091                });
1092                return findings;
1093            }
1094        }
1095    }
1096    findings
1097}
1098
1099// ── XXE ─────────────────────────────────────────────────────────────────────
1100
1101async fn test_xxe(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1102    let mut findings = Vec::new();
1103    let xxe_payloads = payloads::lines(payloads::XXE);
1104    let indicators = ["root:", "daemon:", "Windows", "[fonts]"];
1105
1106    for payload in xxe_payloads.iter().take(2) {
1107        let resp = match client
1108            .post(endpoint)
1109            .header("Content-Type", "application/xml")
1110            .body(payload.to_string())
1111            .send()
1112            .await
1113        {
1114            Ok(r) => r,
1115            Err(_) => continue,
1116        };
1117
1118        if resp.status().is_success() {
1119            let body = match resp.text().await {
1120                Ok(t) => t,
1121                Err(_) => continue,
1122            };
1123            for indicator in &indicators {
1124                if body.contains(indicator) {
1125                    findings.push(VulnerabilityFinding {
1126                        vuln_type: "XXE".into(),
1127                        subtype: "XML External Entity".into(),
1128                        endpoint: endpoint.into(),
1129                        parameter: String::new(),
1130                        payload: payload.to_string(),
1131                        severity: "CRITICAL".into(),
1132                        confidence: "HIGH".into(),
1133                        evidence: "File contents disclosed via XXE".into(),
1134                    });
1135                    return findings;
1136                }
1137            }
1138        }
1139    }
1140    findings
1141}
1142
1143// ── LFI ─────────────────────────────────────────────────────────────────────
1144
1145async fn test_lfi(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1146    let mut findings = Vec::new();
1147    let lfi_payloads = payloads::lines(payloads::LFI);
1148    let params = ["file", "path", "page", "include", "template"];
1149    let indicators = ["root:x:", "daemon:", "[fonts]", "[extensions]"];
1150
1151    for param in &params[..3] {
1152        for payload in lfi_payloads.iter().take(3) {
1153            let encoded = urlencoding::encode(payload);
1154            let test_url = format!("{}?{}={}", endpoint, param, encoded);
1155
1156            if let Some(body) = fetch_body(client, &test_url).await {
1157                for indicator in &indicators {
1158                    if body.contains(indicator) {
1159                        findings.push(VulnerabilityFinding {
1160                            vuln_type: "LFI".into(),
1161                            subtype: "Local File Inclusion".into(),
1162                            endpoint: endpoint.into(),
1163                            parameter: param.to_string(),
1164                            payload: payload.to_string(),
1165                            severity: "HIGH".into(),
1166                            confidence: "HIGH".into(),
1167                            evidence: "Local file contents exposed".into(),
1168                        });
1169                        return findings;
1170                    }
1171                }
1172            }
1173        }
1174    }
1175    findings
1176}
1177
1178// ── Shared helpers ──────────────────────────────────────────────────────────
1179
1180async fn fetch_body(client: &Client, url: &str) -> Option<String> {
1181    let resp = client.get(url).send().await.ok()?;
1182    if resp.status().as_u16() == 404 {
1183        return None;
1184    }
1185    resp.text().await.ok()
1186}
1187
1188fn resolve_url(base: &str, href: &str) -> Option<String> {
1189    if href.starts_with("javascript:") || href.starts_with('#') || href.starts_with("mailto:") {
1190        return None;
1191    }
1192    if href.starts_with("//") {
1193        return Some(format!("https:{}", href));
1194    }
1195    if href.starts_with("http://") || href.starts_with("https://") {
1196        return Some(href.to_string());
1197    }
1198    let base_trimmed = if let Some(idx) = base.rfind('/') {
1199        &base[..idx + 1]
1200    } else {
1201        base
1202    };
1203    Some(format!("{}{}", base_trimmed, href.trim_start_matches('/')))
1204}
web_analyzer/api_security_scanner.rs

web_analyzer/
api_security_scanner.rs