1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::{HashSet, VecDeque};
6use std::time::Duration;
7
8use crate::payloads;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct SecretFinding {
14 pub secret_type: String,
15 pub severity: String,
16 pub masked_value: String,
17 pub source_url: String,
18 pub line: usize,
19 pub entropy: f64,
20 pub recommendation: String,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct JsVulnerability {
25 pub vuln_type: String,
26 pub severity: String,
27 pub source_url: String,
28 pub matched_code: String,
29 pub description: String,
30 pub recommendation: String,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct SsrfFinding {
35 pub finding_type: String,
36 pub severity: String,
37 pub source_url: String,
38 pub vulnerable_params: Vec<String>,
39 pub description: String,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ScanSummary {
44 pub total_urls_crawled: usize,
45 pub total_js_files: usize,
46 pub total_api_endpoints: usize,
47 pub secrets_count: usize,
48 pub js_vulnerabilities_count: usize,
49 pub ssrf_vulnerabilities_count: usize,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct ScannerResult {
54 pub domain: String,
55 pub secrets: Vec<SecretFinding>,
56 pub js_vulnerabilities: Vec<JsVulnerability>,
57 pub ssrf_vulnerabilities: Vec<SsrfFinding>,
58 pub api_endpoints_discovered: Vec<String>,
59 pub summary: ScanSummary,
60}
61
62fn shannon_entropy(data: &str) -> f64 {
65 if data.is_empty() {
66 return 0.0;
67 }
68 let mut freq = [0u32; 256];
69 for b in data.bytes() {
70 freq[b as usize] += 1;
71 }
72 let len = data.len() as f64;
73 freq.iter()
74 .filter(|&&c| c > 0)
75 .map(|&c| {
76 let p = c as f64 / len;
77 -p * p.log2()
78 })
79 .sum()
80}
81
82fn mask_secret(s: &str) -> String {
83 if s.len() <= 8 {
84 if s.len() > 2 {
85 format!("****{}", &s[s.len() - 2..])
86 } else {
87 "****".into()
88 }
89 } else {
90 format!("{}****{}", &s[..4], &s[s.len() - 4..])
91 }
92}
93
94fn is_false_positive_context(context: &str) -> bool {
95 let fp = [
96 "example",
97 "sample",
98 "placeholder",
99 "dummy",
100 "test",
101 "demo",
102 "your_",
103 "my_",
104 "template",
105 "undefined",
106 "localhost",
107 "127.0.0.1",
108 ];
109 let ctx_lower = context.to_lowercase();
110 fp.iter().any(|p| ctx_lower.contains(p))
111}
112
113fn is_known_library(url: &str) -> bool {
114 let libs = [
115 "jquery",
116 "bootstrap",
117 "modernizr",
118 "polyfill",
119 "vendor",
120 "bundle",
121 "analytics",
122 "tracking",
123 "ga.js",
124 "gtm.js",
125 "react",
126 "angular",
127 "vue",
128 "lodash",
129 "moment",
130 "cdn",
131 "static",
132 "dist",
133 "chunk",
134 ];
135 let url_lower = url.to_lowercase();
136 libs.iter().any(|lib| url_lower.contains(lib))
137}
138
139struct SecretPattern {
142 name: &'static str,
143 pattern: &'static str,
144 severity: &'static str,
145 recommendation: &'static str,
146}
147
148const SECRET_PATTERNS: &[SecretPattern] = &[
149 SecretPattern {
150 name: "AWS Access Key",
151 pattern: r"\bAKIA[0-9A-Z]{16}\b",
152 severity: "Medium",
153 recommendation: "Rotate the key immediately. Use AWS IAM roles instead of hard-coded keys.",
154 },
155 SecretPattern {
156 name: "AWS Secret Key",
157 pattern: r"\b[0-9a-zA-Z/+]{40}\b",
158 severity: "High",
159 recommendation: "Rotate the key immediately. Store secrets in AWS Secrets Manager.",
160 },
161 SecretPattern {
162 name: "Google API Key",
163 pattern: r"\bAIza[0-9A-Za-z\-_]{35}\b",
164 severity: "Medium",
165 recommendation: "Rotate the key and implement API key restrictions.",
166 },
167 SecretPattern {
168 name: "Google OAuth",
169 pattern: r"[0-9]+-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com",
170 severity: "Medium",
171 recommendation: "Review and potentially regenerate the OAuth credentials.",
172 },
173 SecretPattern {
174 name: "Stripe API Key",
175 pattern: r"\b(?:sk|pk)_(live|test)_[0-9a-zA-Z]{24,34}\b",
176 severity: "High",
177 recommendation: "Rotate the key immediately. Only use server-side code for Stripe API.",
178 },
179 SecretPattern {
180 name: "GitHub Token",
181 pattern: r"\b(?:github|gh)(?:_pat)?_[0-9a-zA-Z]{36,40}\b",
182 severity: "High",
183 recommendation: "Revoke and regenerate the token. Use GitHub Actions secrets for CI/CD.",
184 },
185 SecretPattern {
186 name: "GitHub OAuth",
187 pattern: r"\bgho_[0-9a-zA-Z]{36,40}\b",
188 severity: "High",
189 recommendation: "Revoke and regenerate the OAuth token.",
190 },
191 SecretPattern {
192 name: "Facebook Access Token",
193 pattern: r"EAACEdEose0cBA[0-9A-Za-z]+",
194 severity: "Medium",
195 recommendation: "Revoke the token and regenerate. Store tokens securely.",
196 },
197 SecretPattern {
198 name: "JWT Token",
199 pattern: r"eyJ[a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]*",
200 severity: "Medium",
201 recommendation: "If valid, rotate the token. Implement proper expiration.",
202 },
203 SecretPattern {
204 name: "SSH Private Key",
205 pattern: r"-----BEGIN\s+(?:RSA|DSA|EC|OPENSSH)\s+PRIVATE\s+KEY",
206 severity: "High",
207 recommendation: "Generate a new key pair. Never store private keys in code.",
208 },
209 SecretPattern {
210 name: "Password in URL",
211 pattern: r"[a-zA-Z]{3,10}://[^/\s:@]{3,20}:[^/\s:@]{3,20}@.{1,100}",
212 severity: "High",
213 recommendation: "Remove the password from the URL and use secure authentication.",
214 },
215 SecretPattern {
216 name: "Firebase URL",
217 pattern: r"https://[a-z0-9-]+\.firebaseio\.com",
218 severity: "Low",
219 recommendation: "Review Firebase security rules and regenerate any associated secrets.",
220 },
221 SecretPattern {
222 name: "MongoDB Connection String",
223 pattern: r"mongodb(?:\+srv)?://[^/\s]+:[^/\s]+@[^/\s]+",
224 severity: "High",
225 recommendation: "Rotate the password and use environment variables instead.",
226 },
227 SecretPattern {
228 name: "Slack Token",
229 pattern: r"xox[baprs]-[0-9a-zA-Z\-]{10,48}",
230 severity: "Medium",
231 recommendation: "Revoke and regenerate the token.",
232 },
233 SecretPattern {
234 name: "Slack Webhook",
235 pattern: r"https://hooks\.slack\.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+",
236 severity: "Medium",
237 recommendation: "Regenerate the webhook URL and store it securely.",
238 },
239 SecretPattern {
240 name: "API Key",
241 pattern: r#"(?i)\b(?:api[_\-]?key|apikey)\b\s*[=:]\s*["'`]([a-zA-Z0-9_\-\.]{16,64})["'`]"#,
242 severity: "Medium",
243 recommendation: "Rotate the key. Store it in environment variables or a secrets manager.",
244 },
245 SecretPattern {
246 name: "Secret Key",
247 pattern: r#"(?i)\b(?:secret[_\-]?key|secretkey)\b\s*[=:]\s*["'`]([a-zA-Z0-9_\-\.]{16,64})["'`]"#,
248 severity: "Medium",
249 recommendation: "Rotate the key and ensure it's stored in a secure vault.",
250 },
251 SecretPattern {
252 name: "Auth Token",
253 pattern: r#"(?i)\b(?:auth[_\-]?token|authtoken)\b\s*[=:]\s*["'`]([a-zA-Z0-9_\-\.]{16,64})["'`]"#,
254 severity: "Medium",
255 recommendation: "Revoke the token and issue a new one.",
256 },
257 SecretPattern {
258 name: "Access Token",
259 pattern: r#"(?i)\b(?:access[_\-]?token|accesstoken)\b\s*[=:]\s*["'`]([a-zA-Z0-9_\-\.]{16,64})["'`]"#,
260 severity: "Medium",
261 recommendation: "Revoke and regenerate the token.",
262 },
263 SecretPattern {
264 name: "Encryption Key",
265 pattern: r#"(?i)(?:encryption|aes|des|blowfish)[\s_-]?key[\s=:]+["'`][A-Za-z0-9+/]{16,}={0,2}["'`]"#,
266 severity: "High",
267 recommendation: "Rotate the key and store it securely using a key management system.",
268 },
269 SecretPattern {
270 name: "Stripe Publishable Key",
271 pattern: r"\bpk_(live|test)_[0-9a-zA-Z]{24,34}\b",
272 severity: "Low",
273 recommendation:
274 "Publishable keys are public, but verify no secret keys are exposed nearby.",
275 },
276 SecretPattern {
277 name: "Twitter Bearer",
278 pattern: r"AAAAAAAAAAAAAAAAAAA[A-Za-z0-9%]+",
279 severity: "Medium",
280 recommendation: "Rotate the bearer token. Use environment variables for storage.",
281 },
282 SecretPattern {
283 name: "Password",
284 pattern: r#"(?i)(?:password|passwd|pwd)[\s=:]+["'`]([^"'`\s]{8,64})["'`]"#,
285 severity: "High",
286 recommendation:
287 "Remove hardcoded passwords. Use a secrets manager or environment variables.",
288 },
289 SecretPattern {
290 name: "Database Credentials",
291 pattern: r#"(?i)(?:db_pass|db_password|database_password)[\s=:]+["'`]([^"'`\s]+)["'`]"#,
292 severity: "High",
293 recommendation: "Change DB credentials immediately. Store in env vars or a vault.",
294 },
295];
296
297struct JsVulnCategory {
300 name: &'static str,
301 severity: &'static str,
302 patterns: &'static [&'static str],
303 description: &'static str,
304 recommendation: &'static str,
305}
306
307const JS_VULN_CATEGORIES: &[JsVulnCategory] = &[
308 JsVulnCategory {
309 name: "DOM XSS",
310 severity: "High",
311 patterns: &[
312 r"document\.write\s*\(\s*.*?(?:location|URL|documentURI|referrer|href|search|hash)",
313 r"\.innerHTML\s*=\s*.*?(?:location|URL|documentURI|referrer|href|search|hash)",
314 r"\.outerHTML\s*=\s*.*?(?:location|URL|documentURI|referrer|href|search|hash)",
315 r"eval\s*\(\s*.*?(?:location|URL|documentURI|referrer|href|search|hash)",
316 ],
317 description:
318 "DOM-based XSS: user-controllable data passed to a dynamic code execution sink.",
319 recommendation:
320 "Sanitize all user inputs before DOM operations. Use DOMPurify or a strict CSP.",
321 },
322 JsVulnCategory {
323 name: "Open Redirect",
324 severity: "High",
325 patterns: &[
326 r"(?:window\.)?location(?:\.href)?\s*=\s*.*?(?:user|input|param|arg)",
327 r"(?:window\.)?location\.replace\s*\(\s*.*?(?:user|input|param|arg)",
328 r"(?:window\.)?location\.assign\s*\(\s*.*?(?:user|input|param|arg)",
329 ],
330 description: "User input determines redirect destination, enabling phishing attacks.",
331 recommendation: "Implement a whitelist of allowed redirect URLs.",
332 },
333 JsVulnCategory {
334 name: "CORS Misconfiguration",
335 severity: "Medium",
336 patterns: &[
337 r"Access-Control-Allow-Origin\s*:\s*\*",
338 r"Access-Control-Allow-Origin\s*:\s*null",
339 r"Access-Control-Allow-Credentials\s*:\s*true",
340 ],
341 description: "CORS misconfiguration can allow unauthorized cross-origin access.",
342 recommendation: "Be specific with CORS policies. Avoid wildcard origins.",
343 },
344 JsVulnCategory {
345 name: "Insecure Cookie",
346 severity: "Medium",
347 patterns: &[r"document\.cookie\s*="],
348 description: "Cookies set without secure flags can be vulnerable to theft.",
349 recommendation: "Set 'Secure' and 'HttpOnly' flags on sensitive cookies.",
350 },
351 JsVulnCategory {
352 name: "Insecure Data Transmission",
353 severity: "Medium",
354 patterns: &[r#"\.postMessage\([^,]+,\s*["']\*["']\)"#],
355 description: "Data transmitted insecurely via postMessage with wildcard origin.",
356 recommendation: "Use specific origin URLs with postMessage() and validate senders.",
357 },
358 JsVulnCategory {
359 name: "Prototype Pollution",
360 severity: "Medium",
361 patterns: &[r"__proto__\s*[=\[]", r"prototype\["],
362 description: "Prototype pollution can lead to property injection attacks.",
363 recommendation:
364 "Avoid user-controlled data with Object.assign()/prototype. Use Object.create(null).",
365 },
366 JsVulnCategory {
367 name: "Command Injection",
368 severity: "High",
369 patterns: &[
370 r"exec\s*\(\s*.*?(?:user|input|param|arg)",
371 r"spawn\s*\(\s*.*?(?:user|input|param|arg)",
372 ],
373 description: "Command injection allows attackers to execute arbitrary commands.",
374 recommendation: "Avoid executing commands with user input. Implement strict validation.",
375 },
376 JsVulnCategory {
377 name: "Insecure Data Storage",
378 severity: "Low",
379 patterns: &[
380 r"localStorage\.setItem\(\s*[^,]+,\s*.*?(?:password|token|key|secret|credentials)",
381 r"sessionStorage\.setItem\(\s*[^,]+,\s*.*?(?:password|token|key|secret|credentials)",
382 ],
383 description: "Sensitive data stored insecurely in client-side storage.",
384 recommendation: "Don't store sensitive info in localStorage/sessionStorage.",
385 },
386 JsVulnCategory {
387 name: "Event Handler XSS",
388 severity: "Medium",
389 patterns: &[r#"\.setAttribute\(["']on\w+["']\s*,"#],
390 description: "Event handlers assigned dynamically can lead to XSS.",
391 recommendation: "Validate and sanitize data before assigning to event handlers.",
392 },
393 JsVulnCategory {
394 name: "CSP Bypass",
395 severity: "Medium",
396 patterns: &[r#"document\.createElement\(["']script["']\)"#],
397 description: "Dynamic script creation may bypass Content Security Policy.",
398 recommendation: "Implement a strict CSP and avoid dynamic script creation with user input.",
399 },
400 JsVulnCategory {
401 name: "WebSocket Insecurity",
402 severity: "High",
403 patterns: &[r#"new\s+WebSocket\(\s*["']ws://"#],
404 description: "Insecure WebSocket connections (ws://) can be intercepted.",
405 recommendation: "Use secure WebSocket connections (wss://) and validate data.",
406 },
407 JsVulnCategory {
408 name: "Insecure Crypto",
409 severity: "High",
410 patterns: &[
411 r#"(?:createHash|crypto\.subtle).*?["'](?:md5|sha1)["']"#,
412 r"Math\.random\(\)",
413 ],
414 description: "Weak cryptographic methods (MD5/SHA1/Math.random) in use.",
415 recommendation:
416 "Use modern crypto algorithms. Use crypto.getRandomValues() instead of Math.random().",
417 },
418 JsVulnCategory {
419 name: "Path Traversal",
420 severity: "Medium",
421 patterns: &[r"\.\./|\.\.\\"],
422 description: "Path traversal allows access to files outside the intended directory.",
423 recommendation: "Validate and sanitize file paths. Use allowlists.",
424 },
425];
426
427const SSRF_PARAMS: &[&str] = &[
430 "url",
431 "uri",
432 "link",
433 "src",
434 "href",
435 "target",
436 "destination",
437 "redirect",
438 "redirect_to",
439 "redirecturl",
440 "redirect_uri",
441 "return",
442 "return_to",
443 "returnurl",
444 "return_path",
445 "path",
446 "load",
447 "file",
448 "filename",
449 "folder",
450 "folder_url",
451 "image",
452 "img",
453 "image_url",
454 "image_path",
455 "avatar",
456 "document",
457 "doc",
458 "document_url",
459 "fetch",
460 "get",
461 "view",
462 "content",
463 "domain",
464 "callback",
465 "reference",
466 "site",
467 "page",
468 "data",
469 "data_url",
470 "resource",
471 "template",
472 "api_endpoint",
473 "endpoint",
474 "proxy",
475 "feed",
476 "host",
477 "webhook",
478 "address",
479 "media",
480 "video",
481 "audio",
482 "download",
483 "upload",
484 "preview",
485 "source",
486 "location",
487 "goto",
488 "callback_url",
489 "forward",
490 "next",
491 "origin",
492 "continue",
493];
494
495pub async fn scan_content(
498 domain: &str,
499) -> Result<ScannerResult, Box<dyn std::error::Error + Send + Sync>> {
500 let base_url = if domain.starts_with("http") {
501 domain.to_string()
502 } else {
503 format!("https://{}", domain)
504 };
505
506 let client = Client::builder()
507 .timeout(Duration::from_secs(15))
508 .danger_accept_invalid_certs(true)
509 .build()?;
510
511 let mut secrets = Vec::new();
512 let mut js_vulns = Vec::new();
513 let mut ssrf_findings = Vec::new();
514 let mut visited = HashSet::new();
515 let mut js_file_urls = HashSet::new();
516 let mut api_endpoints: HashSet<String> = HashSet::new();
517 let mut queue: VecDeque<(String, u8)> = VecDeque::new();
518 queue.push_back((base_url.clone(), 0));
519
520 let max_depth: u8 = 2;
521 let max_pages: usize = 50;
522
523 let secret_regexes: Vec<(&SecretPattern, Regex)> = SECRET_PATTERNS
525 .iter()
526 .filter_map(|sp| Regex::new(sp.pattern).ok().map(|r| (sp, r)))
527 .collect();
528
529 let js_vuln_regexes: Vec<(&JsVulnCategory, Vec<Regex>)> = JS_VULN_CATEGORIES
530 .iter()
531 .map(|cat| {
532 let rxs: Vec<Regex> = cat
533 .patterns
534 .iter()
535 .filter_map(|p| Regex::new(p).ok())
536 .collect();
537 (cat, rxs)
538 })
539 .collect();
540
541 let api_regexes: Vec<Regex> = [
543 r"/api/v\d+/",
544 r"/api/",
545 r"/graphql",
546 r"/rest/",
547 r"/v\d+/\w+",
548 r"/service/",
549 r"/json/",
550 r"/rpc/",
551 r"/gateway/",
552 r"/ajax/",
553 r"/data/",
554 r"/query/",
555 r"/feeds/",
556 r"/svc/",
557 r"/soap/",
558 ]
559 .iter()
560 .filter_map(|p| Regex::new(p).ok())
561 .collect();
562
563 let mut disallowed: Vec<String> = Vec::new();
565 let robots_url = format!("{}/robots.txt", base_url.trim_end_matches('/'));
566 if let Ok(resp) = client.get(&robots_url).send().await {
567 if resp.status().is_success() {
568 if let Ok(body) = resp.text().await {
569 let mut agent_match = false;
570 for line in body.lines() {
571 let line = line.trim().to_lowercase();
572 if let Some(agent) = line.strip_prefix("user-agent:") {
573 let agent = agent.trim();
574 agent_match = agent == "*";
575 }
576 if agent_match {
577 if let Some(path) = line.strip_prefix("disallow:") {
578 let path = path.trim();
579 if !path.is_empty() {
580 disallowed.push(path.to_string());
581 }
582 }
583 }
584 }
585 }
586 }
587 }
588
589 let sitemap_url = format!("{}/sitemap.xml", base_url.trim_end_matches('/'));
591 if let Ok(resp) = client.get(&sitemap_url).send().await {
592 if resp.status().is_success() {
593 if let Ok(body) = resp.text().await {
594 let loc_rx = Regex::new(r"<loc>([^<]+)</loc>").unwrap();
595 for cap in loc_rx.captures_iter(&body) {
596 if let Some(url) = cap.get(1) {
597 let u = url.as_str().to_string();
598 if is_same_domain(&base_url, &u) && !visited.contains(&u) {
599 queue.push_back((u, 1));
600 }
601 }
602 }
603 }
604 }
605 }
606
607 while let Some((url, depth)) = queue.pop_front() {
609 if visited.len() >= max_pages || depth > max_depth || visited.contains(&url) {
610 continue;
611 }
612
613 let url_path = url.trim_start_matches(&base_url);
615 if disallowed.iter().any(|d| url_path.starts_with(d.as_str())) {
616 continue;
617 }
618
619 visited.insert(url.clone());
620
621 check_url_params_ssrf(&url, &mut ssrf_findings);
623
624 let resp = match client.get(&url).send().await {
625 Ok(r) => r,
626 Err(_) => continue,
627 };
628 if !resp.status().is_success() {
629 continue;
630 }
631
632 let content_type = resp
633 .headers()
634 .get("content-type")
635 .and_then(|v| v.to_str().ok())
636 .unwrap_or("")
637 .to_lowercase();
638
639 let body = match resp.text().await {
640 Ok(t) => t,
641 Err(_) => continue,
642 };
643
644 scan_for_secrets(&body, &url, &secret_regexes, &mut secrets);
646
647 extract_api_endpoints(&body, &base_url, &api_regexes, &mut api_endpoints);
649
650 if content_type.contains("text/html") {
651 let doc = Html::parse_document(&body);
652
653 if depth < max_depth {
655 let a_sel = Selector::parse("a[href]").unwrap();
656 for el in doc.select(&a_sel) {
657 if let Some(href) = el.value().attr("href") {
658 let abs = resolve_url(&base_url, href);
659 if let Some(abs_url) = abs {
660 if is_same_domain(&base_url, &abs_url) && !visited.contains(&abs_url) {
661 queue.push_back((abs_url, depth + 1));
662 }
663 }
664 }
665 }
666 }
667
668 let script_sel = Selector::parse("script").unwrap();
670 for el in doc.select(&script_sel) {
671 let inline = el.text().collect::<String>();
673 if inline.len() > 10 {
674 scan_js_security(&inline, &url, &js_vuln_regexes, &mut js_vulns);
675 scan_for_secrets(&inline, &url, &secret_regexes, &mut secrets);
676 }
677 if let Some(src) = el.value().attr("src") {
679 if let Some(js_url) = resolve_url(&base_url, src) {
680 if !is_known_library(&js_url) {
681 js_file_urls.insert(js_url);
682 }
683 }
684 }
685 }
686
687 let form_sel = Selector::parse("form").unwrap();
689 let input_sel = Selector::parse("input[name], textarea[name]").unwrap();
690 for form in doc.select(&form_sel) {
691 let mut vuln_params = Vec::new();
692 for input in form.select(&input_sel) {
693 if let Some(name) = input.value().attr("name") {
694 let name_lower = name.to_lowercase();
695 if SSRF_PARAMS.iter().any(|p| name_lower.contains(p)) {
696 vuln_params.push(name.to_string());
697 }
698 }
699 }
700 if !vuln_params.is_empty() {
701 ssrf_findings.push(SsrfFinding {
702 finding_type: "Potential SSRF in Form".into(),
703 severity: "Medium".into(),
704 source_url: url.clone(),
705 vulnerable_params: vuln_params,
706 description: "Form contains fields that could be used for Server-Side Request Forgery.".into(),
707 });
708 }
709 }
710
711 let meta_sel =
713 Selector::parse(r#"meta[http-equiv="Content-Security-Policy"]"#).unwrap();
714 for meta in doc.select(&meta_sel) {
715 if let Some(content) = meta.value().attr("content") {
716 let c_lower = content.to_lowercase();
717 if c_lower.contains("unsafe-inline") || c_lower.contains("unsafe-eval") {
718 js_vulns.push(JsVulnerability {
719 vuln_type: "Weak CSP".into(),
720 severity: "Medium".into(),
721 source_url: url.clone(),
722 matched_code: content.to_string(),
723 description: "CSP allows unsafe-inline or unsafe-eval.".into(),
724 recommendation: "Remove unsafe-inline and unsafe-eval from your CSP."
725 .into(),
726 });
727 }
728 }
729 }
730
731 let csrf_sel = Selector::parse(
733 r#"input[name*="csrf" i], input[name*="xsrf" i], input[name*="token" i]"#,
734 )
735 .unwrap();
736 for form in doc.select(&form_sel) {
737 if form.select(&csrf_sel).next().is_none() {
738 js_vulns.push(JsVulnerability {
739 vuln_type: "Missing CSRF Protection".into(),
740 severity: "Medium".into(),
741 source_url: url.clone(),
742 matched_code: String::new(),
743 description: "Form found without CSRF token.".into(),
744 recommendation: "Add CSRF tokens to all state-changing forms.".into(),
745 });
746 }
747 }
748 } else if (content_type.contains("javascript") || url.ends_with(".js"))
749 && !is_known_library(&url) {
750 js_file_urls.insert(url.clone());
751 scan_js_security(&body, &url, &js_vuln_regexes, &mut js_vulns);
752 scan_for_secrets(&body, &url, &secret_regexes, &mut secrets);
753 }
754 }
755
756 for js_url in &js_file_urls {
758 if visited.contains(js_url) {
759 continue;
760 }
761 if let Ok(resp) = client.get(js_url).send().await {
762 if resp.status().is_success() {
763 if let Ok(js_body) = resp.text().await {
764 if js_body.len() > 10 {
765 scan_js_security(&js_body, js_url, &js_vuln_regexes, &mut js_vulns);
766 scan_for_secrets(&js_body, js_url, &secret_regexes, &mut secrets);
767 extract_api_endpoints(
768 &js_body,
769 &base_url,
770 &api_regexes,
771 &mut api_endpoints,
772 );
773 }
774 }
775 }
776 }
777 }
778
779 let ssrf_probes = payloads::lines(payloads::SSRF);
781 for endpoint in api_endpoints.iter().take(20) {
782 for probe in ssrf_probes.iter().take(5) {
784 let test_url = format!("{}?url={}", endpoint, probe);
786 if let Ok(resp) = client.get(&test_url).header("Accept", "*/*").send().await {
787 if resp.status().is_redirection() {
789 if let Some(loc) = resp.headers().get("location") {
790 if let Ok(loc_str) = loc.to_str() {
791 if loc_str.contains(probe) {
792 ssrf_findings.push(SsrfFinding {
793 finding_type: "Confirmed SSRF in API Endpoint".into(),
794 severity: "High".into(),
795 source_url: endpoint.clone(),
796 vulnerable_params: vec!["url".into()],
797 description: format!(
798 "API endpoint redirects to SSRF probe: {}",
799 loc_str
800 ),
801 });
802 }
803 }
804 }
805 }
806 }
807 }
808 }
809
810 dedup_secrets(&mut secrets);
812 dedup_js_vulns(&mut js_vulns);
813
814 let api_list: Vec<String> = api_endpoints.into_iter().collect();
815
816 let summary = ScanSummary {
817 total_urls_crawled: visited.len(),
818 total_js_files: js_file_urls.len(),
819 total_api_endpoints: api_list.len(),
820 secrets_count: secrets.len(),
821 js_vulnerabilities_count: js_vulns.len(),
822 ssrf_vulnerabilities_count: ssrf_findings.len(),
823 };
824
825 Ok(ScannerResult {
826 domain: domain.to_string(),
827 secrets,
828 js_vulnerabilities: js_vulns,
829 ssrf_vulnerabilities: ssrf_findings,
830 api_endpoints_discovered: api_list,
831 summary,
832 })
833}
834
835fn scan_for_secrets(
838 content: &str,
839 source_url: &str,
840 patterns: &[(&SecretPattern, Regex)],
841 results: &mut Vec<SecretFinding>,
842) {
843 for (sp, rx) in patterns {
844 for m in rx.find_iter(content) {
845 let value = m.as_str();
846 let line = content[..m.start()].matches('\n').count() + 1;
847 let entropy = shannon_entropy(value);
848
849 if matches!(
851 sp.name,
852 "AWS Secret Key" | "Google API Key" | "API Key" | "Secret Key"
853 ) && entropy < 3.5
854 {
855 continue;
856 }
857
858 let ctx_start = m.start().saturating_sub(80);
860 let ctx_end = (m.end() + 80).min(content.len());
861 let context = &content[ctx_start..ctx_end];
862 if is_false_positive_context(context) {
863 continue;
864 }
865
866 results.push(SecretFinding {
867 secret_type: sp.name.to_string(),
868 severity: sp.severity.to_string(),
869 masked_value: mask_secret(value),
870 source_url: source_url.to_string(),
871 line,
872 entropy: (entropy * 100.0).round() / 100.0,
873 recommendation: sp.recommendation.to_string(),
874 });
875 }
876 }
877}
878
879fn scan_js_security(
880 content: &str,
881 source_url: &str,
882 categories: &[(&JsVulnCategory, Vec<Regex>)],
883 results: &mut Vec<JsVulnerability>,
884) {
885 let is_minified = content.len() > 5000 && content.matches('\n').count() < 50;
887
888 for (cat, rxs) in categories {
889 if is_minified && cat.severity != "High" {
891 continue;
892 }
893
894 for rx in rxs {
895 for m in rx.find_iter(content) {
896 let matched = m.as_str();
897 let display = if matched.len() > 200 {
899 &matched[..200]
900 } else {
901 matched
902 };
903
904 results.push(JsVulnerability {
905 vuln_type: cat.name.to_string(),
906 severity: cat.severity.to_string(),
907 source_url: source_url.to_string(),
908 matched_code: display.to_string(),
909 description: cat.description.to_string(),
910 recommendation: cat.recommendation.to_string(),
911 });
912 }
913 }
914 }
915}
916
917fn dedup_secrets(v: &mut Vec<SecretFinding>) {
918 let mut seen = HashSet::new();
919 v.retain(|s| {
920 seen.insert(format!(
921 "{}:{}:{}",
922 s.secret_type, s.source_url, s.masked_value
923 ))
924 });
925}
926
927fn dedup_js_vulns(v: &mut Vec<JsVulnerability>) {
928 let mut seen = HashSet::new();
929 v.retain(|j| {
930 seen.insert(format!(
931 "{}:{}:{}",
932 j.vuln_type, j.source_url, j.matched_code
933 ))
934 });
935}
936
937fn check_url_params_ssrf(url: &str, findings: &mut Vec<SsrfFinding>) {
938 if let Some(query_start) = url.find('?') {
939 let query = &url[query_start + 1..];
940 let mut vuln_params = Vec::new();
941 for pair in query.split('&') {
942 if let Some(eq) = pair.find('=') {
943 let param = pair[..eq].to_lowercase();
944 if SSRF_PARAMS.iter().any(|p| param.contains(p)) {
945 vuln_params.push(pair[..eq].to_string());
946 }
947 }
948 }
949 if !vuln_params.is_empty() {
950 findings.push(SsrfFinding {
951 finding_type: "Potential SSRF in URL Parameter".into(),
952 severity: "Medium".into(),
953 source_url: url.to_string(),
954 vulnerable_params: vuln_params,
955 description: "URL contains parameters that could be used for SSRF.".into(),
956 });
957 }
958 }
959}
960
961fn extract_api_endpoints(
962 content: &str,
963 base_url: &str,
964 patterns: &[Regex],
965 endpoints: &mut HashSet<String>,
966) {
967 for rx in patterns {
968 for m in rx.find_iter(content) {
969 let path = m.as_str();
970 let full_url = format!("{}{}", base_url.trim_end_matches('/'), path);
971 endpoints.insert(full_url);
972 }
973 }
974}
975
976fn resolve_url(base: &str, href: &str) -> Option<String> {
977 if href.starts_with("javascript:")
978 || href.starts_with('#')
979 || href.starts_with("mailto:")
980 || href.starts_with("tel:")
981 {
982 return None;
983 }
984 if href.starts_with("//") {
985 return Some(format!("https:{}", href));
986 }
987 if href.starts_with("http://") || href.starts_with("https://") {
988 return Some(href.to_string());
989 }
990 let base_trimmed = if let Some(idx) = base.rfind('/') {
992 &base[..idx + 1]
993 } else {
994 base
995 };
996 Some(format!("{}{}", base_trimmed, href.trim_start_matches('/')))
997}
998
999fn is_same_domain(base: &str, url: &str) -> bool {
1000 let extract_host = |u: &str| -> String {
1001 u.trim_start_matches("https://")
1002 .trim_start_matches("http://")
1003 .split('/')
1004 .next()
1005 .unwrap_or("")
1006 .to_lowercase()
1007 };
1008 extract_host(base) == extract_host(url)
1009}