web_analyzer/
api_security_scanner.rs

1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashSet;
6use std::time::{Duration, Instant};
7
8use crate::payloads;
9
10// ── Result structs ──────────────────────────────────────────────────────────
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ApiEndpoint {
14    pub url: String,
15    pub status_code: u16,
16    pub api_type: String,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct VulnerabilityFinding {
21    pub vuln_type: String,
22    pub subtype: String,
23    pub endpoint: String,
24    pub parameter: String,
25    pub payload: String,
26    pub severity: String,
27    pub confidence: String,
28    pub evidence: String,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct ApiScanResult {
33    pub domain: String,
34    pub endpoints_found: Vec<ApiEndpoint>,
35    pub vulnerabilities: Vec<VulnerabilityFinding>,
36    pub total_paths_probed: usize,
37    pub endpoints_tested: usize,
38}
39
40// ── HTML killwords — definitive NOT-API indicators ──────────────────────────
41
42const HTML_KILLERS: &[&str] = &[
43    "<!doctype html",
44    "<html",
45    "<head>",
46    "<body>",
47    "<title>",
48    "<div",
49    "<form",
50    "<table",
51    "<script",
52    "not found</title>",
53    "404 not found",
54    "404 - not found",
55    "page not found",
56    "file not found",
57    "apache/2.",
58    "nginx/",
59    "microsoft-iis",
60    "server error",
61    "access denied",
62    "forbidden",
63    "directory listing",
64    "index of /",
65    "<h1>404</h1>",
66    "<h1>error</h1>",
67];
68
69// ── Swagger/OpenAPI documentation indicators ────────────────────────────────
70
71const DOC_INDICATORS: &[&str] = &[
72    "\"openapi\":",
73    "\"swagger\":",
74    "\"info\":",
75    "\"paths\":",
76    "\"components\":",
77    "\"definitions\":",
78    "\"host\":",
79    "\"basepath\":",
80    "\"schemes\":",
81    "\"consumes\":",
82    "\"produces\":",
83];
84
85const DOC_URL_HINTS: &[&str] = &[
86    "openapi",
87    "swagger",
88    "docs",
89    "spec",
90    "schema",
91    "definition",
92    ".json",
93    ".yaml",
94    ".yml",
95];
96
97// ── API-specific response headers ───────────────────────────────────────────
98
99const API_HEADERS: &[&str] = &[
100    "x-api-version",
101    "x-api-key",
102    "x-rate-limit",
103    "x-ratelimit",
104    "x-request-id",
105    "x-correlation-id",
106    "x-trace-id",
107];
108
109const FRAMEWORK_SERVERS: &[&str] = &[
110    "express", "koa", "fastify", "spring", "django", "flask", "tornado", "rails", "sinatra",
111    "fastapi",
112];
113
114// ── Auth error patterns (regex) ─────────────────────────────────────────────
115
116const AUTH_ERROR_PATTERNS: &[&str] = &[
117    r#""error"\s*:\s*"(unauthorized|forbidden|invalid.*token|missing.*auth)"#,
118    r#""message"\s*:\s*"(unauthorized|forbidden|authentication|authorization)"#,
119    r#""code"\s*:\s*"(401|403|auth_required|token_invalid)"#,
120    r#""status"\s*:\s*"(unauthorized|forbidden|error)","#,
121    r#""access_token""#,
122    r#""api_key""#,
123    r#""authentication.*required""#,
124    r#""invalid.*credentials""#,
125];
126
127// ── RESTful API structure patterns ──────────────────────────────────────────
128
129const API_STRUCTURE_PATTERNS: &[&str] = &[
130    r#"^\s*\{\s*"data"\s*:\s*[\{\[]"#,
131    r#"^\s*\{\s*"result"\s*:\s*[\{\[]"#,
132    r#"^\s*\{\s*"results"\s*:\s*\["#,
133    r#"^\s*\{\s*"items"\s*:\s*\["#,
134    r#"^\s*\{\s*"records"\s*:\s*\["#,
135    r#"^\s*\{\s*"version"\s*:\s*"[^"]*""#,
136    r#"^\s*\{\s*"api_version"\s*:\s*"[^"]*""#,
137    r#"^\s*\{\s*"timestamp"\s*:\s*\d+"#,
138    r#"^\s*\{\s*"error"\s*:\s*\{\s*"code""#,
139    r#"^\s*\{\s*"error"\s*:\s*\{\s*"message""#,
140    r#"^\s*\{\s*"errors"\s*:\s*\[.*"message""#,
141    r#"^\s*\{\s*"success"\s*:\s*(true|false)"#,
142    r#"^\s*\{\s*"status"\s*:\s*"(up|down|ok|healthy|error|fail|success)""#,
143    r#"^\s*\{\s*"health"\s*:\s*"(up|down|ok)""#,
144];
145
146// ── SQL error patterns ──────────────────────────────────────────────────────
147
148const SQL_ERROR_PATTERNS: &[&str] = &[
149    r"You have an error in your SQL syntax",
150    r"MySQL server version for the right syntax",
151    r"PostgreSQL.*ERROR.*syntax error",
152    r"ORA-[0-9]{5}.*invalid identifier",
153    r"SQLite error.*syntax error",
154    r"SQLException.*invalid column name",
155    r"mysql_fetch_array\(\).*expects parameter",
156    r"Warning.*mysql_.*\(\).*supplied argument",
157];
158
159// ── JS API endpoint extraction patterns ─────────────────────────────────────
160
161const JS_API_PATTERNS: &[&str] = &[
162    r#"fetch\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
163    r#"axios\.[a-z]+\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
164    r#"\$\.ajax\([^)]*url\s*:\s*['"`](/[^'"`\s]+)['"`]"#,
165    r#"\$\.get\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
166    r#"\$\.post\s*\(\s*['"`](/[^'"`\s]+)['"`]"#,
167    r#"apiUrl\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
168    r#"API_URL\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
169    r#"baseURL\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
170    r#"endpoint\s*[:=]\s*['"`](/[^'"`\s]+)['"`]"#,
171];
172
173// ── Main scanner ────────────────────────────────────────────────────────────
174
175pub async fn scan_api_endpoints(
176    domain: &str,
177    progress_tx: Option<tokio::sync::mpsc::Sender<crate::ScanProgress>>,
178) -> Result<ApiScanResult, Box<dyn std::error::Error + Send + Sync>> {
179    let base_url = if domain.starts_with("http") {
180        domain.to_string()
181    } else {
182        format!("https://{}", domain)
183    };
184
185    let client = Client::builder()
186        .timeout(Duration::from_secs(15))
187        .danger_accept_invalid_certs(true)
188        .redirect(reqwest::redirect::Policy::limited(3))
189        .build()?;
190
191    // ── Phase 1: Endpoint Discovery ─────────────────────────────────────
192    let mut verified_endpoints: Vec<ApiEndpoint> = Vec::new();
193
194    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 5.0, message: "Started API endpoint discovery...".into(), status: "Info".into() }).await; }
195
196    // 1a. Probe paths from embedded api_endpoints.txt
197    let api_paths = payloads::lines(payloads::API_ENDPOINTS);
198    let total_paths_probed = api_paths.len();
199
200    for (i, path) in api_paths.iter().enumerate() {
201        if i % 10 == 0 {
202            if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 5.0 + (15.0 * (i as f32 / total_paths_probed as f32)), message: format!("Probing paths: {}", path), status: "Info".into() }).await; }
203        }
204        let url = format!("{}{}", base_url.trim_end_matches('/'), path);
205        if let Some(endpoint) = verify_endpoint(&client, &url).await {
206            verified_endpoints.push(endpoint);
207        }
208    }
209
210    // 1b. Extract endpoints from JavaScript on main page
211    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 20.0, message: "Extracting JavaScript endpoints...".into(), status: "Info".into() }).await; }
212    let js_endpoints = extract_js_endpoints(&client, &base_url).await;
213    for url in &js_endpoints {
214        if !verified_endpoints.iter().any(|e| e.url == *url) {
215            if let Some(endpoint) = verify_endpoint(&client, url).await {
216                verified_endpoints.push(endpoint);
217            }
218        }
219    }
220
221    // 1c. Extract API paths from robots.txt and sitemap.xml
222    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 25.0, message: "Checking robots.txt & sitemap.xml...".into(), status: "Info".into() }).await; }
223    let robots_endpoints = extract_robots_sitemap_endpoints(&client, &base_url).await;
224    for url in &robots_endpoints {
225        if !verified_endpoints.iter().any(|e| e.url == *url) {
226            if let Some(endpoint) = verify_endpoint(&client, url).await {
227                verified_endpoints.push(endpoint);
228            }
229        }
230    }
231
232    // 1d. Scrape Swagger/OpenAPI documentation for real paths
233    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 30.0, message: "Hunting for OpenAPI/Swagger docs...".into(), status: "Info".into() }).await; }
234    let doc_endpoints = scrape_documentation_endpoints(&client, &base_url).await;
235    for url in &doc_endpoints {
236        if !verified_endpoints.iter().any(|e| e.url == *url) {
237            if let Some(endpoint) = verify_endpoint(&client, url).await {
238                verified_endpoints.push(endpoint);
239            }
240        }
241    }
242
243    // 1e. Check common API subdomains
244    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 35.0, message: "Bruting common API subdomains...".into(), status: "Info".into() }).await; }
245    let subdomain_endpoints = check_api_subdomains(&client, domain).await;
246    for url in &subdomain_endpoints {
247        if !verified_endpoints.iter().any(|e| e.url == *url) {
248            if let Some(endpoint) = verify_endpoint(&client, url).await {
249                verified_endpoints.push(endpoint);
250            }
251        }
252    }
253
254    // ── Phase 2: Vulnerability Testing ──────────────────────────────────
255    let mut vulnerabilities: Vec<VulnerabilityFinding> = Vec::new();
256    let endpoints_tested = verified_endpoints.len();
257
258    if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 40.0, message: format!("Found {} endpoints, starting active fuzzing...", endpoints_tested), status: "Info".into() }).await; }
259
260    for (i, ep) in verified_endpoints.iter().enumerate() {
261        if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "API Security".into(), percentage: 40.0 + (60.0 * (i as f32 / endpoints_tested.max(1) as f32)), message: format!("Fuzzing endpoint: {}", ep.url), status: "Info".into() }).await; }
262        let mut findings = test_endpoint(&client, &ep.url).await;
263        vulnerabilities.append(&mut findings);
264
265        // Early exit on excessive criticals
266        let critical_count = vulnerabilities
267            .iter()
268            .filter(|v| v.severity == "CRITICAL")
269            .count();
270        if critical_count >= 10 {
271            break;
272        }
273    }
274
275    Ok(ApiScanResult {
276        domain: domain.to_string(),
277        endpoints_found: verified_endpoints,
278        vulnerabilities,
279        total_paths_probed,
280        endpoints_tested,
281    })
282}
283
284// ── Advanced API endpoint verification ──────────────────────────────────────
285
286async fn verify_endpoint(client: &Client, url: &str) -> Option<ApiEndpoint> {
287    // Try GET first, then OPTIONS, HEAD — majority voting
288    let methods = ["GET", "OPTIONS", "HEAD"];
289    let mut votes: Vec<(String, u16)> = Vec::new(); // (api_type, status)
290
291    for method in &methods {
292        let req = match *method {
293            "GET" => client.get(url),
294            "OPTIONS" => client.request(reqwest::Method::OPTIONS, url),
295            "HEAD" => client.head(url),
296            _ => continue,
297        };
298
299        let resp = match req.send().await {
300            Ok(r) => r,
301            Err(_) => continue,
302        };
303
304        let status = resp.status().as_u16();
305
306        // Immediate disqualifiers
307        if matches!(status, 404 | 502 | 503 | 500) {
308            continue;
309        }
310
311        let headers: Vec<(String, String)> = resp
312            .headers()
313            .iter()
314            .map(|(k, v)| {
315                (
316                    k.as_str().to_lowercase(),
317                    v.to_str().unwrap_or("").to_lowercase(),
318                )
319            })
320            .collect();
321
322        let content_type = headers
323            .iter()
324            .find(|(k, _)| k == "content-type")
325            .map(|(_, v)| v.as_str())
326            .unwrap_or("");
327
328        // For HEAD/OPTIONS we can't read body, just check headers
329        if *method != "GET" {
330            if let Some(api_type) = detect_api_from_headers(content_type, &headers, status) {
331                votes.push((api_type, status));
332            }
333            continue;
334        }
335
336        // GET — full body analysis
337        let body = match resp.text().await {
338            Ok(t) => t,
339            Err(_) => continue,
340        };
341
342        if body.trim().len() < 5 {
343            continue;
344        }
345
346        let sample = if body.len() > 5000 {
347            &body[..5000]
348        } else {
349            &body
350        };
351        let sample_lower = sample.to_lowercase();
352
353        // HTML killer filter
354        if HTML_KILLERS.iter().any(|k| sample_lower.contains(k)) {
355            continue;
356        }
357
358        // Documentation file detection
359        let is_doc_url = DOC_URL_HINTS.iter().any(|h| url.to_lowercase().contains(h));
360        if is_doc_url {
361            let doc_score: usize = DOC_INDICATORS
362                .iter()
363                .filter(|d| sample_lower.contains(*d))
364                .count();
365            if doc_score >= 3 {
366                continue; // Skip API documentation files
367            }
368        }
369
370        // Content-type based definitive detection
371        let ct_api = if content_type.contains("application/json") {
372            // Verify valid JSON
373            if serde_json::from_str::<serde_json::Value>(sample).is_ok() {
374                Some("REST/JSON".to_string())
375            } else {
376                None
377            }
378        } else if content_type.contains("application/xml") || content_type.contains("text/xml") {
379            Some("REST/XML".to_string())
380        } else if content_type.contains("graphql") {
381            Some("GraphQL".to_string())
382        } else if content_type.contains("application/vnd.api+json") {
383            Some("JSON:API".to_string())
384        } else if content_type.contains("application/hal+json") {
385            Some("HAL+JSON".to_string())
386        } else if content_type.contains("application/problem+json") {
387            Some("Problem Details".to_string())
388        } else {
389            None
390        };
391
392        if let Some(api_type) = ct_api {
393            votes.push((api_type, status));
394            continue;
395        }
396
397        // Auth-protected endpoint detection (401/403)
398        if matches!(status, 401 | 403) {
399            let auth_headers = [
400                "www-authenticate",
401                "x-api-key",
402                "x-auth-token",
403                "x-rate-limit",
404            ];
405            if auth_headers
406                .iter()
407                .any(|h| headers.iter().any(|(k, _)| k == h))
408            {
409                votes.push(("Protected API".to_string(), status));
410                continue;
411            }
412            // Check body for API-style auth errors
413            let auth_regexes: Vec<Regex> = AUTH_ERROR_PATTERNS
414                .iter()
415                .filter_map(|p| Regex::new(p).ok())
416                .collect();
417            if auth_regexes.iter().any(|rx| rx.is_match(&sample_lower)) {
418                votes.push(("Protected API".to_string(), status));
419                continue;
420            }
421        }
422
423        // API structure pattern scoring
424        let structure_regexes: Vec<Regex> = API_STRUCTURE_PATTERNS
425            .iter()
426            .filter_map(|p| Regex::new(p).ok())
427            .collect();
428        let structure_score: usize = structure_regexes
429            .iter()
430            .filter(|rx| rx.is_match(sample))
431            .count();
432
433        // API header scoring
434        let api_header_score: usize = API_HEADERS
435            .iter()
436            .filter(|h| headers.iter().any(|(k, _)| k == **h))
437            .count();
438
439        // Framework detection via Server header
440        let framework_score: usize = headers
441            .iter()
442            .filter(|(k, _)| k == "server")
443            .map(|(_, v)| FRAMEWORK_SERVERS.iter().filter(|f| v.contains(*f)).count() * 2)
444            .sum();
445
446        let total_score = structure_score + api_header_score + framework_score;
447
448        if total_score >= 4 || (total_score >= 2 && status == 200) {
449            votes.push(("REST API".to_string(), status));
450        }
451    }
452
453    // Majority voting
454    if votes.is_empty() {
455        return None;
456    }
457
458    // Pick the best vote (prefer 2xx status)
459    let best = votes
460        .iter()
461        .max_by_key(|(_, s)| {
462            if *s < 400 {
463                1000 - *s as i32
464            } else {
465                -((*s) as i32)
466            }
467        })
468        .unwrap();
469
470    Some(ApiEndpoint {
471        url: url.to_string(),
472        status_code: best.1,
473        api_type: best.0.clone(),
474    })
475}
476
477fn detect_api_from_headers(
478    content_type: &str,
479    headers: &[(String, String)],
480    status: u16,
481) -> Option<String> {
482    if content_type.contains("application/json") {
483        return Some("REST/JSON".to_string());
484    }
485    if content_type.contains("application/xml") || content_type.contains("text/xml") {
486        return Some("REST/XML".to_string());
487    }
488    if content_type.contains("graphql") {
489        return Some("GraphQL".to_string());
490    }
491    if matches!(status, 401 | 403) {
492        let auth_headers = ["www-authenticate", "x-api-key", "x-rate-limit"];
493        if auth_headers
494            .iter()
495            .any(|h| headers.iter().any(|(k, _)| k == h))
496        {
497            return Some("Protected API".to_string());
498        }
499    }
500    None
501}
502
503// ── Endpoint Discovery Helpers ──────────────────────────────────────────────
504
505async fn extract_js_endpoints(client: &Client, base_url: &str) -> Vec<String> {
506    let mut endpoints = HashSet::new();
507    let resp = match client.get(base_url).send().await {
508        Ok(r) if r.status().is_success() => r,
509        _ => return Vec::new(),
510    };
511    let body = match resp.text().await {
512        Ok(t) => t,
513        Err(_) => return Vec::new(),
514    };
515
516    // Collect inline JS
517    let mut all_js = String::new();
518    let mut external_urls = Vec::new();
519
520    {
521        let doc = Html::parse_document(&body);
522        let script_sel = Selector::parse("script").unwrap();
523        for el in doc.select(&script_sel) {
524            let inline = el.text().collect::<String>();
525            if inline.len() > 10 {
526                all_js.push('\n');
527                all_js.push_str(&inline);
528            }
529            // Fetch up to 10 external JS files
530            if let Some(src) = el.value().attr("src") {
531                if external_urls.len() < 10 {
532                    external_urls.push(src.to_string());
533                }
534            }
535        }
536    }
537
538    for src in external_urls {
539        if endpoints.len() > 10 {
540            break;
541        }
542        if let Some(js_url) = resolve_url(base_url, &src) {
543            if let Ok(resp) = client.get(&js_url).send().await {
544                if resp.status().is_success() {
545                    if let Ok(js_body) = resp.text().await {
546                        all_js.push('\n');
547                        all_js.push_str(&js_body);
548                    }
549                }
550            }
551        }
552    }
553
554    // Extract API paths from JS content
555    let regexes: Vec<Regex> = JS_API_PATTERNS
556        .iter()
557        .filter_map(|p| Regex::new(p).ok())
558        .collect();
559
560    for rx in &regexes {
561        for cap in rx.captures_iter(&all_js) {
562            if let Some(m) = cap.get(1) {
563                let path = m.as_str().trim();
564                if path.is_empty() {
565                    continue;
566                }
567                // Skip static assets
568                if [".js", ".css", ".png", ".jpg", ".gif", ".ico", ".svg"]
569                    .iter()
570                    .any(|ext| path.to_lowercase().ends_with(ext))
571                {
572                    continue;
573                }
574                let full = format!("{}{}", base_url.trim_end_matches('/'), path);
575                endpoints.insert(full);
576            }
577        }
578    }
579
580    endpoints.into_iter().collect()
581}
582
583async fn extract_robots_sitemap_endpoints(client: &Client, base_url: &str) -> Vec<String> {
584    let mut endpoints = HashSet::new();
585
586    // robots.txt
587    let robots_url = format!("{}/robots.txt", base_url.trim_end_matches('/'));
588    if let Ok(resp) = client.get(&robots_url).send().await {
589        if resp.status().is_success() {
590            if let Ok(body) = resp.text().await {
591                for line in body.lines() {
592                    let line = line.trim().to_lowercase();
593                    if (line.starts_with("disallow:") || line.starts_with("allow:"))
594                        && line.contains(':')
595                    {
596                        let path = line.split_once(':').map(|(_, v)| v.trim()).unwrap_or("");
597                        if !path.is_empty()
598                            && path != "/"
599                            && ["api", "graphql", "rest"]
600                                .iter()
601                                .any(|kw| path.contains(kw))
602                        {
603                            endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), path));
604                        }
605                    }
606                }
607            }
608        }
609    }
610
611    // sitemap.xml
612    let sitemap_url = format!("{}/sitemap.xml", base_url.trim_end_matches('/'));
613    if let Ok(resp) = client.get(&sitemap_url).send().await {
614        if resp.status().is_success() {
615            if let Ok(body) = resp.text().await {
616                if let Ok(rx) = Regex::new(r"<loc>([^<]+)</loc>") {
617                    for cap in rx.captures_iter(&body) {
618                        if let Some(m) = cap.get(1) {
619                            let url = m.as_str();
620                            if ["api", "graphql", "rest"]
621                                .iter()
622                                .any(|kw| url.to_lowercase().contains(kw))
623                            {
624                                endpoints.insert(url.to_string());
625                            }
626                        }
627                    }
628                }
629            }
630        }
631    }
632
633    endpoints.into_iter().collect()
634}
635
636async fn scrape_documentation_endpoints(client: &Client, base_url: &str) -> Vec<String> {
637    let mut endpoints = HashSet::new();
638    let doc_paths = [
639        "/swagger.json",
640        "/openapi.json",
641        "/api-docs",
642        "/docs",
643        "/swagger",
644        "/api/swagger.json",
645        "/api/docs",
646    ];
647
648    for path in &doc_paths {
649        let url = format!("{}{}", base_url.trim_end_matches('/'), path);
650        let resp = match client.get(&url).send().await {
651            Ok(r) if r.status().is_success() => r,
652            _ => continue,
653        };
654        let body = match resp.text().await {
655            Ok(t) => t,
656            Err(_) => continue,
657        };
658
659        // Try to parse as JSON and extract "paths" key
660        if let Ok(doc) = serde_json::from_str::<serde_json::Value>(&body) {
661            if let Some(paths) = doc.get("paths").and_then(|p| p.as_object()) {
662                for path_key in paths.keys() {
663                    if path_key.starts_with('/') {
664                        endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), path_key));
665                    }
666                }
667            }
668            if let Some(base_path) = doc.get("basePath").and_then(|b| b.as_str()) {
669                if !base_path.is_empty() {
670                    endpoints.insert(format!("{}{}", base_url.trim_end_matches('/'), base_path));
671                }
672            }
673        }
674    }
675
676    endpoints.into_iter().collect()
677}
678
679async fn check_api_subdomains(client: &Client, domain: &str) -> Vec<String> {
680    let mut endpoints = Vec::new();
681    let bare_domain = domain
682        .trim_start_matches("https://")
683        .trim_start_matches("http://")
684        .split('/')
685        .next()
686        .unwrap_or(domain);
687
688    let parts: Vec<&str> = bare_domain.split('.').collect();
689    if parts.len() < 2 {
690        return endpoints;
691    }
692
693    let base = format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1]);
694
695    let prefixes = [
696        "api",
697        "rest",
698        "graphql",
699        "gateway",
700        "api-v1",
701        "api-v2",
702        "api-dev",
703        "dev-api",
704        "api-staging",
705        "staging-api",
706        "mobile-api",
707        "app-api",
708        "admin-api",
709        "auth-api",
710    ];
711
712    for prefix in &prefixes[..8] {
713        // limit to avoid excessive requests
714        for proto in &["https", "http"] {
715            let url = format!("{}://{}.{}", proto, prefix, base);
716            if let Ok(resp) = client.get(&url).send().await {
717                if resp.status().is_success() || matches!(resp.status().as_u16(), 401 | 403) {
718                    endpoints.push(url);
719                    break; // Found, skip other protocol
720                }
721            }
722        }
723    }
724
725    endpoints
726}
727
728// ── Vulnerability Testing ───────────────────────────────────────────────────
729
730async fn test_endpoint(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
731    let mut findings = Vec::new();
732
733    findings.append(&mut test_sql_injection(client, endpoint).await);
734    findings.append(&mut test_xss(client, endpoint).await);
735    findings.append(&mut test_ssti(client, endpoint).await);
736    findings.append(&mut test_ssrf(client, endpoint).await);
737    findings.append(&mut test_auth_bypass(client, endpoint).await);
738    findings.append(&mut test_command_injection(client, endpoint).await);
739    findings.append(&mut test_nosql_injection(client, endpoint).await);
740    findings.append(&mut test_xxe(client, endpoint).await);
741    findings.append(&mut test_lfi(client, endpoint).await);
742
743    findings
744}
745
746// ── SQLi ────────────────────────────────────────────────────────────────────
747
748async fn test_sql_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
749    let mut findings = Vec::new();
750    let sqli_payloads = payloads::lines(payloads::SQL_INJECTION);
751    let params = ["id", "user", "search", "q", "filter"];
752
753    let error_regexes: Vec<Regex> = SQL_ERROR_PATTERNS
754        .iter()
755        .filter_map(|p| Regex::new(p).ok())
756        .collect();
757
758    for param in &params[..3] {
759        // Baseline
760        let baseline_url = format!("{}?{}=1", endpoint, param);
761        let baseline_body = match fetch_body(client, &baseline_url).await {
762            Some(b) => b,
763            None => continue,
764        };
765        if error_regexes.iter().any(|rx| rx.is_match(&baseline_body)) {
766            continue; // Baseline already has SQL errors
767        }
768
769        for payload in sqli_payloads.iter().take(5) {
770            let encoded = urlencoding::encode(payload);
771            let test_url = format!("{}?{}={}", endpoint, param, encoded);
772
773            // Time-based detection
774            if payload.to_uppercase().contains("SLEEP")
775                || payload.to_uppercase().contains("WAITFOR")
776            {
777                let start = Instant::now();
778                if let Ok(resp) = client.get(&test_url).send().await {
779                    let elapsed = start.elapsed().as_secs_f64();
780                    let _ = resp.text().await;
781                    if elapsed > 4.8 {
782                        findings.push(VulnerabilityFinding {
783                            vuln_type: "SQL_INJECTION".into(),
784                            subtype: "Time-based Blind".into(),
785                            endpoint: endpoint.into(),
786                            parameter: param.to_string(),
787                            payload: payload.to_string(),
788                            severity: "CRITICAL".into(),
789                            confidence: "MEDIUM".into(),
790                            evidence: format!("Response delayed {:.1}s", elapsed),
791                        });
792                        return findings;
793                    }
794                }
795                continue;
796            }
797
798            // Error-based detection
799            if let Some(body) = fetch_body(client, &test_url).await {
800                for rx in &error_regexes {
801                    if let Some(m) = rx.find(&body) {
802                        if !rx.is_match(&baseline_body) {
803                            findings.push(VulnerabilityFinding {
804                                vuln_type: "SQL_INJECTION".into(),
805                                subtype: "Error-based".into(),
806                                endpoint: endpoint.into(),
807                                parameter: param.to_string(),
808                                payload: payload.to_string(),
809                                severity: "CRITICAL".into(),
810                                confidence: "HIGH".into(),
811                                evidence: format!("SQL error: {}", m.as_str()),
812                            });
813                            return findings;
814                        }
815                    }
816                }
817            }
818        }
819    }
820
821    findings
822}
823
824// ── XSS ─────────────────────────────────────────────────────────────────────
825
826async fn test_xss(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
827    let mut findings = Vec::new();
828    let xss_payloads = payloads::lines(payloads::XSS);
829    let params = ["q", "search", "query", "keyword", "name"];
830
831    for payload in xss_payloads.iter().take(5) {
832        for param in &params[..3] {
833            let encoded = urlencoding::encode(payload);
834            let test_url = format!("{}?{}={}", endpoint, param, encoded);
835
836            let resp = match client.get(&test_url).send().await {
837                Ok(r) => r,
838                Err(_) => continue,
839            };
840
841            if !resp.status().is_success() {
842                continue;
843            }
844
845            let ct = resp
846                .headers()
847                .get("content-type")
848                .and_then(|v| v.to_str().ok())
849                .unwrap_or("")
850                .to_lowercase();
851
852            if !ct.contains("text/html") {
853                continue;
854            }
855
856            let body = match resp.text().await {
857                Ok(t) => t,
858                Err(_) => continue,
859            };
860
861            // Payload reflected unencoded in HTML
862            if body.contains(payload) && !is_payload_safe_context(&body, payload) {
863                findings.push(VulnerabilityFinding {
864                    vuln_type: "XSS".into(),
865                    subtype: "Reflected".into(),
866                    endpoint: endpoint.into(),
867                    parameter: param.to_string(),
868                    payload: payload.to_string(),
869                    severity: "HIGH".into(),
870                    confidence: "HIGH".into(),
871                    evidence: "Payload reflected in HTML without encoding".into(),
872                });
873                return findings;
874            }
875        }
876    }
877    findings
878}
879
880fn is_payload_safe_context(content: &str, payload: &str) -> bool {
881    let pos = match content.find(payload) {
882        Some(p) => p,
883        None => return true,
884    };
885    // Inside HTML comment?
886    let before = &content[..pos];
887    let after = &content[pos..];
888    if before.rfind("<!--").is_some() && after.contains("-->") {
889        let comment_start = before.rfind("<!--").unwrap();
890        if !before[comment_start..].contains("-->") {
891            return true;
892        }
893    }
894    // Properly encoded?
895    let encoded = payload.replace('<', "&lt;").replace('>', "&gt;");
896    if content.contains(&encoded) {
897        return true;
898    }
899    false
900}
901
902// ── SSTI ────────────────────────────────────────────────────────────────────
903
904async fn test_ssti(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
905    let mut findings = Vec::new();
906    let tests = [
907        ("{{7*7*7}}", "343"),
908        ("{{9*9*9}}", "729"),
909        ("${8*8*8}", "512"),
910        ("{{42*13}}", "546"),
911    ];
912    let params = ["template", "name", "msg", "content"];
913
914    for &(payload, expected) in &tests {
915        for param in &params[..3] {
916            // Baseline
917            let baseline_url = format!("{}?{}=normaltext", endpoint, param);
918            let baseline = match fetch_body(client, &baseline_url).await {
919                Some(b) => b,
920                None => continue,
921            };
922
923            let encoded = urlencoding::encode(payload);
924            let test_url = format!("{}?{}={}", endpoint, param, encoded);
925
926            if let Some(body) = fetch_body(client, &test_url).await {
927                if body.contains(expected)
928                    && !body.contains(payload)
929                    && !baseline.contains(expected)
930                {
931                    findings.push(VulnerabilityFinding {
932                        vuln_type: "SSTI".into(),
933                        subtype: "Template Injection".into(),
934                        endpoint: endpoint.into(),
935                        parameter: param.to_string(),
936                        payload: payload.to_string(),
937                        severity: "CRITICAL".into(),
938                        confidence: "HIGH".into(),
939                        evidence: format!("Template executed: {} = {}", payload, expected),
940                    });
941                    return findings;
942                }
943            }
944        }
945    }
946    findings
947}
948
949// ── SSRF ────────────────────────────────────────────────────────────────────
950
951async fn test_ssrf(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
952    let mut findings = Vec::new();
953    let ssrf_payloads = payloads::lines(payloads::SSRF);
954    let params = ["url", "uri", "path", "dest", "redirect"];
955    let indicators = [
956        "root:",
957        "daemon:",
958        "localhost",
959        "metadata",
960        "ami-id",
961        "instance-id",
962    ];
963
964    for param in &params[..3] {
965        for payload in ssrf_payloads.iter().take(3) {
966            let encoded = urlencoding::encode(payload);
967            let test_url = format!("{}?{}={}", endpoint, param, encoded);
968
969            if let Some(body) = fetch_body(client, &test_url).await {
970                for indicator in &indicators {
971                    if body.contains(indicator) {
972                        findings.push(VulnerabilityFinding {
973                            vuln_type: "SSRF".into(),
974                            subtype: "Server-Side Request Forgery".into(),
975                            endpoint: endpoint.into(),
976                            parameter: param.to_string(),
977                            payload: payload.to_string(),
978                            severity: "CRITICAL".into(),
979                            confidence: "HIGH".into(),
980                            evidence: format!("Internal data leaked: {}", indicator),
981                        });
982                        return findings;
983                    }
984                }
985            }
986        }
987    }
988    findings
989}
990
991// ── Auth Bypass ─────────────────────────────────────────────────────────────
992
993async fn test_auth_bypass(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
994    let mut findings = Vec::new();
995
996    // Check if endpoint is normally protected
997    let normal_status = match client.get(endpoint).send().await {
998        Ok(r) => r.status().as_u16(),
999        Err(_) => return findings,
1000    };
1001    if !matches!(normal_status, 401 | 403) {
1002        return findings; // Not protected, skip
1003    }
1004
1005    let bypass_headers = payloads::auth_headers(payloads::AUTH_BYPASS_HEADERS);
1006
1007    for (name, value) in bypass_headers.iter().take(10) {
1008        let resp = match client
1009            .get(endpoint)
1010            .header(name as &str, value as &str)
1011            .send()
1012            .await
1013        {
1014            Ok(r) => r,
1015            Err(_) => continue,
1016        };
1017
1018        if resp.status().as_u16() == 200 {
1019            findings.push(VulnerabilityFinding {
1020                vuln_type: "AUTH_BYPASS".into(),
1021                subtype: "Header-based".into(),
1022                endpoint: endpoint.into(),
1023                parameter: String::new(),
1024                payload: format!("{}: {}", name, value),
1025                severity: "CRITICAL".into(),
1026                confidence: "HIGH".into(),
1027                evidence: format!("Bypass with header {}: {}", name, value),
1028            });
1029            return findings;
1030        }
1031    }
1032    findings
1033}
1034
1035// ── Command Injection ───────────────────────────────────────────────────────
1036
1037async fn test_command_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1038    let mut findings = Vec::new();
1039    let cmd_payloads = payloads::lines(payloads::COMMAND_INJECTION);
1040    let params = ["cmd", "exec", "command", "ping", "host"];
1041
1042    for param in &params[..3] {
1043        for payload in cmd_payloads.iter().take(3) {
1044            if payload.to_lowercase().contains("sleep") {
1045                let encoded = urlencoding::encode(payload);
1046                let test_url = format!("{}?{}={}", endpoint, param, encoded);
1047                let start = Instant::now();
1048                if let Ok(resp) = client.get(&test_url).send().await {
1049                    let elapsed = start.elapsed().as_secs_f64();
1050                    let _ = resp.text().await;
1051                    if elapsed > 4.5 {
1052                        findings.push(VulnerabilityFinding {
1053                            vuln_type: "COMMAND_INJECTION".into(),
1054                            subtype: "Time-based".into(),
1055                            endpoint: endpoint.into(),
1056                            parameter: param.to_string(),
1057                            payload: payload.to_string(),
1058                            severity: "CRITICAL".into(),
1059                            confidence: "HIGH".into(),
1060                            evidence: format!("Command executed (delay: {:.1}s)", elapsed),
1061                        });
1062                        return findings;
1063                    }
1064                }
1065            }
1066        }
1067    }
1068    findings
1069}
1070
1071// ── NoSQL Injection ─────────────────────────────────────────────────────────
1072
1073async fn test_nosql_injection(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1074    let mut findings = Vec::new();
1075    let nosql_payloads = payloads::lines(payloads::NOSQL_INJECTION);
1076
1077    for payload in nosql_payloads.iter().take(3) {
1078        let resp = match client
1079            .post(endpoint)
1080            .header("Content-Type", "application/json")
1081            .body(payload.to_string())
1082            .send()
1083            .await
1084        {
1085            Ok(r) => r,
1086            Err(_) => continue,
1087        };
1088
1089        if matches!(resp.status().as_u16(), 200 | 201) {
1090            let body = match resp.text().await {
1091                Ok(t) => t,
1092                Err(_) => continue,
1093            };
1094            if body.len() > 100 && !body.to_lowercase().contains("error") {
1095                findings.push(VulnerabilityFinding {
1096                    vuln_type: "NOSQL_INJECTION".into(),
1097                    subtype: "Operator Injection".into(),
1098                    endpoint: endpoint.into(),
1099                    parameter: String::new(),
1100                    payload: payload.to_string(),
1101                    severity: "HIGH".into(),
1102                    confidence: "MEDIUM".into(),
1103                    evidence: "NoSQL operator accepted, returned data".into(),
1104                });
1105                return findings;
1106            }
1107        }
1108    }
1109    findings
1110}
1111
1112// ── XXE ─────────────────────────────────────────────────────────────────────
1113
1114async fn test_xxe(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1115    let mut findings = Vec::new();
1116    let xxe_payloads = payloads::lines(payloads::XXE);
1117    let indicators = ["root:", "daemon:", "Windows", "[fonts]"];
1118
1119    for payload in xxe_payloads.iter().take(2) {
1120        let resp = match client
1121            .post(endpoint)
1122            .header("Content-Type", "application/xml")
1123            .body(payload.to_string())
1124            .send()
1125            .await
1126        {
1127            Ok(r) => r,
1128            Err(_) => continue,
1129        };
1130
1131        if resp.status().is_success() {
1132            let body = match resp.text().await {
1133                Ok(t) => t,
1134                Err(_) => continue,
1135            };
1136            for indicator in &indicators {
1137                if body.contains(indicator) {
1138                    findings.push(VulnerabilityFinding {
1139                        vuln_type: "XXE".into(),
1140                        subtype: "XML External Entity".into(),
1141                        endpoint: endpoint.into(),
1142                        parameter: String::new(),
1143                        payload: payload.to_string(),
1144                        severity: "CRITICAL".into(),
1145                        confidence: "HIGH".into(),
1146                        evidence: "File contents disclosed via XXE".into(),
1147                    });
1148                    return findings;
1149                }
1150            }
1151        }
1152    }
1153    findings
1154}
1155
1156// ── LFI ─────────────────────────────────────────────────────────────────────
1157
1158async fn test_lfi(client: &Client, endpoint: &str) -> Vec<VulnerabilityFinding> {
1159    let mut findings = Vec::new();
1160    let lfi_payloads = payloads::lines(payloads::LFI);
1161    let params = ["file", "path", "page", "include", "template"];
1162    let indicators = ["root:x:", "daemon:", "[fonts]", "[extensions]"];
1163
1164    for param in &params[..3] {
1165        for payload in lfi_payloads.iter().take(3) {
1166            let encoded = urlencoding::encode(payload);
1167            let test_url = format!("{}?{}={}", endpoint, param, encoded);
1168
1169            if let Some(body) = fetch_body(client, &test_url).await {
1170                for indicator in &indicators {
1171                    if body.contains(indicator) {
1172                        findings.push(VulnerabilityFinding {
1173                            vuln_type: "LFI".into(),
1174                            subtype: "Local File Inclusion".into(),
1175                            endpoint: endpoint.into(),
1176                            parameter: param.to_string(),
1177                            payload: payload.to_string(),
1178                            severity: "HIGH".into(),
1179                            confidence: "HIGH".into(),
1180                            evidence: "Local file contents exposed".into(),
1181                        });
1182                        return findings;
1183                    }
1184                }
1185            }
1186        }
1187    }
1188    findings
1189}
1190
1191// ── Shared helpers ──────────────────────────────────────────────────────────
1192
1193async fn fetch_body(client: &Client, url: &str) -> Option<String> {
1194    let resp = client.get(url).send().await.ok()?;
1195    if resp.status().as_u16() == 404 {
1196        return None;
1197    }
1198    resp.text().await.ok()
1199}
1200
1201fn resolve_url(base: &str, href: &str) -> Option<String> {
1202    if href.starts_with("javascript:") || href.starts_with('#') || href.starts_with("mailto:") {
1203        return None;
1204    }
1205    if href.starts_with("//") {
1206        return Some(format!("https:{}", href));
1207    }
1208    if href.starts_with("http://") || href.starts_with("https://") {
1209        return Some(href.to_string());
1210    }
1211    let base_trimmed = if let Some(idx) = base.rfind('/') {
1212        &base[..idx + 1]
1213    } else {
1214        base
1215    };
1216    Some(format!("{}{}", base_trimmed, href.trim_start_matches('/')))
1217}
web_analyzer/api_security_scanner.rs

web_analyzer/
api_security_scanner.rs