Skip to main content

web_analyzer/
web_technologies.rs

1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::Duration;
7
8// ── Detection pattern constants ─────────────────────────────────────────────
9
10const WEB_SERVERS: &[(&str, &str)] = &[
11    ("nginx", "Nginx"),
12    ("apache", "Apache HTTP Server"),
13    ("iis", "Microsoft IIS"),
14    ("cloudflare", "Cloudflare"),
15    ("litespeed", "LiteSpeed"),
16    ("caddy", "Caddy"),
17    ("traefik", "Traefik Proxy"),
18    ("envoy", "Envoy Proxy"),
19    ("gunicorn", "Gunicorn WSGI"),
20    ("uwsgi", "uWSGI"),
21];
22
23const JS_LIBRARIES: &[(&str, &[&str])] = &[
24    ("jQuery", &["jquery", "jquery.min.js"]),
25    ("Lodash", &["lodash", "underscore"]),
26    ("Moment.js", &["moment.js", "moment.min.js"]),
27    ("D3.js", &["d3.js", "d3.min.js"]),
28    ("Chart.js", &["chart.js", "chart.min.js"]),
29    ("Three.js", &["three.js", "three.min.js"]),
30    ("GSAP", &["gsap", "tweenmax"]),
31    ("Axios", &["axios"]),
32    ("Swiper", &["swiper"]),
33    ("Bootstrap JS", &["bootstrap.js", "bootstrap.min.js"]),
34    ("Popper.js", &["popper.js"]),
35    ("Font Awesome", &["fontawesome", "font-awesome"]),
36];
37
38const CSS_FRAMEWORKS: &[(&str, &[&str])] = &[
39    ("Bootstrap", &["bootstrap"]),
40    ("Tailwind CSS", &["tailwind"]),
41    ("Bulma", &["bulma"]),
42    ("Foundation", &["foundation"]),
43    ("Semantic UI", &["semantic-ui"]),
44    ("Materialize", &["materialize"]),
45    ("UIKit", &["uikit"]),
46    ("Pure CSS", &["pure-css", "pure-"]),
47];
48
49const CMS_PATTERNS: &[(&str, &[&str])] = &[
50    (
51        "WordPress",
52        &["wp-content", "wp-includes", "wp-admin", "wordpress"],
53    ),
54    ("Drupal", &["drupal", "sites/all", "sites/default"]),
55    ("Joomla", &["joomla", "option=com_"]),
56    ("Magento", &["magento", "mage/cookies.js", "skin/frontend"]),
57    ("Shopify", &["shopify", "shopifycdn"]),
58    ("Wix", &["wix.com", "wixstatic"]),
59    ("Squarespace", &["squarespace", "sqsp"]),
60    ("Ghost", &["ghost.io", "casper"]),
61    ("Webflow", &["webflow"]),
62    ("TYPO3", &["typo3", "typo3conf"]),
63    ("Concrete5", &["concrete5"]),
64];
65
66const ECOMMERCE: &[(&str, &[&str])] = &[
67    ("Shopify", &["shopify", "shopifycdn"]),
68    ("WooCommerce", &["woocommerce", "wc-"]),
69    ("Magento", &["magento", "mage"]),
70    ("PrestaShop", &["prestashop"]),
71    ("BigCommerce", &["bigcommerce"]),
72    ("OpenCart", &["opencart"]),
73    ("Stripe", &["stripe"]),
74    ("PayPal", &["paypal"]),
75    ("Square", &["squareup"]),
76];
77
78const ANALYTICS: &[(&str, &[&str])] = &[
79    (
80        "Google Analytics",
81        &["google-analytics", "googletagmanager", "gtag"],
82    ),
83    ("Google Tag Manager", &["googletagmanager"]),
84    ("Facebook Pixel", &["facebook.net/tr", "fbevents.js"]),
85    ("Hotjar", &["hotjar"]),
86    ("Mixpanel", &["mixpanel"]),
87    ("Segment", &["segment.com", "analytics.js"]),
88    ("Adobe Analytics", &["adobe", "omniture"]),
89    ("Yandex Metrica", &["yandex", "metrica"]),
90];
91
92const WAF_INDICATORS: &[(&str, &[&str])] = &[
93    ("Cloudflare", &["cf-ray", "cloudflare"]),
94    ("AWS WAF", &["x-amzn-requestid", "awselb"]),
95    ("Incapsula", &["incap_ses", "incapsula"]),
96    ("Akamai", &["akamai"]),
97    ("Sucuri", &["sucuri"]),
98    ("ModSecurity", &["mod_security"]),
99    ("F5 BIG-IP", &["bigip", "f5"]),
100    ("Barracuda", &["barracuda"]),
101];
102
103const SECURITY_HEADERS: &[(&str, &str)] = &[
104    ("Content-Security-Policy", "High"),
105    ("Strict-Transport-Security", "High"),
106    ("X-Frame-Options", "Medium"),
107    ("X-Content-Type-Options", "Medium"),
108    ("X-XSS-Protection", "Medium"),
109    ("Referrer-Policy", "Medium"),
110];
111
112const WP_KNOWN_PLUGINS: &[(&str, &str)] = &[
113    ("yoast", "Yoast SEO"),
114    ("akismet", "Akismet Anti-Spam"),
115    ("jetpack", "Jetpack"),
116    ("woocommerce", "WooCommerce"),
117    ("contact-form-7", "Contact Form 7"),
118    ("elementor", "Elementor"),
119    ("wordfence", "Wordfence Security"),
120    ("wp-super-cache", "WP Super Cache"),
121    ("all-in-one-seo", "All in One SEO"),
122    ("google-analytics", "Google Analytics"),
123];
124
125// ── Data Structures ─────────────────────────────────────────────────────────
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct WebTechResult {
129    pub domain: String,
130    // Basic technology
131    pub web_server: String,
132    pub backend: Vec<String>,
133    pub frontend: Vec<String>,
134    pub js_libraries: Vec<String>,
135    pub css_frameworks: Vec<String>,
136    pub cms: Vec<String>,
137    pub ecommerce: Vec<String>,
138    pub cdn: Vec<String>,
139    pub analytics: Vec<String>,
140    // Security
141    pub security_headers: HashMap<String, SecurityHeaderInfo>,
142    pub security_vulnerabilities: VulnerabilityInfo,
143    pub information_disclosure: DisclosureInfo,
144    pub security_services: SecurityServicesInfo,
145    pub cookie_security: CookieSecurityInfo,
146    // WordPress
147    pub is_wordpress: bool,
148    pub wordpress_analysis: Option<WordPressAnalysis>,
149    // Score
150    pub security_score: SecurityScoreResult,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct SecurityHeaderInfo {
155    pub present: bool,
156    pub value: String,
157    pub security_level: String,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct VulnerabilityInfo {
162    pub missing_security_headers: Vec<String>,
163    pub insecure_practices: Vec<String>,
164    pub exposed_information: Vec<String>,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct DisclosureInfo {
169    pub server_info: Vec<String>,
170    pub technology_disclosure: Vec<String>,
171    pub file_exposure: Vec<String>,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct SecurityServicesInfo {
176    pub waf: Vec<String>,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct CookieSecurityInfo {
181    pub secure_flag: bool,
182    pub httponly_flag: bool,
183    pub samesite_attribute: bool,
184    pub security_score: u32,
185    pub security_level: String,
186    pub recommendations: Vec<String>,
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct WordPressAnalysis {
191    pub confidence: String,
192    pub version: String,
193    pub theme: String,
194    pub plugins: Vec<String>,
195    pub users_found: Vec<WpUser>,
196    pub rest_api_enabled: bool,
197    pub xmlrpc_enabled: bool,
198    pub admin_accessible: bool,
199    pub login_accessible: bool,
200    pub debug_enabled: bool,
201    pub security_issues: Vec<String>,
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct WpUser {
206    pub id: u64,
207    pub username: String,
208    pub display_name: String,
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct SecurityScoreResult {
213    pub overall_score: u32,
214    pub security_grade: String,
215    pub risk_level: String,
216    pub critical_issues: Vec<String>,
217    pub recommendations: Vec<String>,
218}
219
220// ── Main Function ───────────────────────────────────────────────────────────
221
222pub async fn detect_web_technologies(
223    domain: &str,
224) -> Result<WebTechResult, Box<dyn std::error::Error + Send + Sync>> {
225    let url = if domain.starts_with("http") {
226        domain.to_string()
227    } else {
228        format!("https://{}", domain)
229    };
230
231    let client = Client::builder()
232        .timeout(Duration::from_secs(30))
233        .danger_accept_invalid_certs(true)
234        .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
235        .build()?;
236
237    let res = client.get(&url).send().await?;
238    let headers = res.headers().clone();
239    let html_raw = res.text().await?;
240    let html_lower = html_raw.to_lowercase();
241
242    let base_domain = domain.replace("https://", "").replace("http://", "");
243
244    // Cache header strings
245    let server_hdr = get_header(&headers, "server");
246    let powered_by = get_header(&headers, "x-powered-by").to_lowercase();
247    let headers_str = format!("{:?}", headers).to_lowercase();
248
249    // ── All Html-dependent (sync) work in a block so it drops before .await ──
250    let (
251        web_server,
252        backend,
253        frontend,
254        js_libraries,
255        css_frameworks,
256        cms,
257        ecommerce,
258        cdn,
259        analytics,
260        security_headers,
261        security_vulnerabilities,
262        information_disclosure,
263        security_services,
264        cookie_security,
265        is_wordpress,
266        wp_version,
267        wp_theme,
268        wp_plugins,
269    ) = {
270        let document = Html::parse_document(&html_raw);
271
272        let web_server = detect_server(&server_hdr, &powered_by);
273        let backend = detect_backend(&html_lower, &powered_by, &server_hdr);
274        let frontend = detect_frontend(&html_lower, &document);
275        let js_libraries = detect_pattern_list(&html_lower, &document, JS_LIBRARIES);
276        let css_frameworks = detect_css(&html_lower, &document);
277        let cms = detect_by_content(&html_lower, CMS_PATTERNS);
278        let ecommerce = detect_by_content(&html_lower, ECOMMERCE);
279        let cdn = detect_cdn(&server_hdr, &headers, &html_lower);
280        let analytics = detect_pattern_list(&html_lower, &document, ANALYTICS);
281        let security_headers = analyze_security_headers(&headers);
282        let security_vulnerabilities = detect_vulnerabilities(&html_lower, &headers);
283        let information_disclosure = detect_disclosure(&html_lower, &server_hdr, &powered_by);
284        let security_services = detect_waf(&headers_str, &html_lower);
285        let cookie_security = analyze_cookies(&headers);
286        let is_wordpress = is_wp(&html_lower);
287
288        // Extract WP data synchronously while we have Html
289        let wp_version = if is_wordpress {
290            extract_wp_version(&html_lower, &document)
291        } else {
292            String::new()
293        };
294        let wp_theme = if is_wordpress {
295            extract_wp_theme(&document)
296        } else {
297            String::new()
298        };
299        let wp_plugins = if is_wordpress {
300            extract_wp_plugins(&html_lower, &document)
301        } else {
302            vec![]
303        };
304
305        (
306            web_server,
307            backend,
308            frontend,
309            js_libraries,
310            css_frameworks,
311            cms,
312            ecommerce,
313            cdn,
314            analytics,
315            security_headers,
316            security_vulnerabilities,
317            information_disclosure,
318            security_services,
319            cookie_security,
320            is_wordpress,
321            wp_version,
322            wp_theme,
323            wp_plugins,
324        )
325    }; // `document` (Html) is dropped here — safe for async after this point
326
327    // 15. WordPress Analysis (async — no Html involved)
328    let wordpress_analysis = if is_wordpress {
329        Some(
330            analyze_wordpress(
331                &client,
332                &base_domain,
333                &html_lower,
334                wp_version,
335                wp_theme,
336                wp_plugins,
337            )
338            .await,
339        )
340    } else {
341        None
342    };
343
344    // 16. Security Score
345    let security_score = calculate_score(
346        &security_headers,
347        &security_vulnerabilities,
348        &information_disclosure,
349        &security_services,
350        &cookie_security,
351        &wordpress_analysis,
352    );
353
354    Ok(WebTechResult {
355        domain: domain.to_string(),
356        web_server,
357        backend,
358        frontend,
359        js_libraries,
360        css_frameworks,
361        cms,
362        ecommerce,
363        cdn,
364        analytics,
365        security_headers,
366        security_vulnerabilities,
367        information_disclosure,
368        security_services,
369        cookie_security,
370        is_wordpress,
371        wordpress_analysis,
372        security_score,
373    })
374}
375
376// ── 1. Web Server ───────────────────────────────────────────────────────────
377
378fn detect_server(server: &str, powered_by: &str) -> String {
379    let s_lower = server.to_lowercase();
380    let p_lower = powered_by.to_lowercase();
381    let version_re = Regex::new(r"[\d\.]+").ok();
382    for &(key, name) in WEB_SERVERS {
383        if s_lower.contains(key) || p_lower.contains(key) {
384            let version = version_re
385                .as_ref()
386                .and_then(|r| r.find(&s_lower).map(|m| format!(" {}", m.as_str())))
387                .unwrap_or_default();
388            return format!("{}{}", name, version);
389        }
390    }
391    if server.is_empty() {
392        "Not Detected".into()
393    } else {
394        server.to_string()
395    }
396}
397
398// ── 2. Backend Technologies ─────────────────────────────────────────────────
399
400fn detect_backend(html: &str, powered_by: &str, server: &str) -> Vec<String> {
401    let mut techs = vec![];
402    let srv = server.to_lowercase();
403
404    if powered_by.contains("php") || html.contains(".php") || html.contains("phpsessid") {
405        techs.push("PHP".into());
406    }
407    if powered_by.contains("asp.net") || html.contains("__viewstate") || html.contains("aspxauth") {
408        techs.push("ASP.NET".into());
409    }
410    if powered_by.contains("express") || srv.contains("node") || powered_by.contains("koa") {
411        techs.push("Node.js".into());
412    }
413    if html.contains("django") || html.contains("csrfmiddlewaretoken") {
414        techs.push("Python Django".into());
415    }
416    if html.contains("flask") || srv.contains("werkzeug") {
417        techs.push("Python Flask".into());
418    }
419    if powered_by.contains("ruby") || html.contains("rails") || html.contains("authenticity_token")
420    {
421        techs.push("Ruby on Rails".into());
422    }
423    if html.contains("jsessionid")
424        || html.contains("servlet")
425        || html.contains(".jsp")
426        || html.contains("spring")
427    {
428        techs.push("Java".into());
429    }
430    if html.contains("golang") || html.contains("gin-gonic") {
431        techs.push("Go".into());
432    }
433
434    if techs.is_empty() {
435        vec!["Not Detected".into()]
436    } else {
437        techs
438    }
439}
440
441// ── 3. Frontend Technologies ────────────────────────────────────────────────
442
443fn detect_frontend(html: &str, doc: &Html) -> Vec<String> {
444    let mut techs = vec![];
445    let scripts = collect_script_srcs(doc);
446
447    if scripts.contains("react") || html.contains("data-reactroot") || html.contains("__react") {
448        techs.push("React".into());
449    }
450    if scripts.contains("vue") || html.contains("v-app") || html.contains("v-cloak") {
451        techs.push("Vue.js".into());
452    }
453    if scripts.contains("angular") || html.contains("ng-app") || html.contains("ng-version") {
454        techs.push("Angular".into());
455    }
456    if scripts.contains("svelte") || html.contains("_svelte") {
457        techs.push("Svelte".into());
458    }
459    if scripts.contains("ember") || html.contains("ember-application") {
460        techs.push("Ember.js".into());
461    }
462    if scripts.contains("alpine") || html.contains("x-data") {
463        techs.push("Alpine.js".into());
464    }
465    if scripts.contains("jquery") {
466        techs.push("jQuery".into());
467    }
468
469    if techs.is_empty() {
470        vec!["Not Detected".into()]
471    } else {
472        techs
473    }
474}
475
476// ── 4/9. Pattern-based detection (JS libs, Analytics) ───────────────────────
477
478fn detect_pattern_list(html: &str, doc: &Html, patterns: &[(&str, &[&str])]) -> Vec<String> {
479    let mut found = vec![];
480    let scripts = collect_script_srcs(doc);
481    for &(name, pats) in patterns {
482        if pats.iter().any(|p| scripts.contains(p) || html.contains(p)) {
483            found.push(name.to_string());
484        }
485    }
486    if found.is_empty() {
487        vec!["Not Detected".into()]
488    } else {
489        found
490    }
491}
492
493// ── 5. CSS Frameworks ───────────────────────────────────────────────────────
494
495fn detect_css(html: &str, doc: &Html) -> Vec<String> {
496    let mut found = vec![];
497    let stylesheets = collect_stylesheet_hrefs(doc);
498    let combined = format!("{} {}", stylesheets, html);
499    for &(name, pats) in CSS_FRAMEWORKS {
500        if pats.iter().any(|p| combined.contains(p)) {
501            found.push(name.to_string());
502        }
503    }
504    if found.is_empty() {
505        vec!["Not Detected".into()]
506    } else {
507        found
508    }
509}
510
511// ── 6/7. Content-based detection (CMS, E-commerce) ─────────────────────────
512
513fn detect_by_content(html: &str, patterns: &[(&str, &[&str])]) -> Vec<String> {
514    let mut found = vec![];
515    for &(name, pats) in patterns {
516        if pats.iter().any(|p| html.contains(p)) {
517            found.push(name.to_string());
518        }
519    }
520    if found.is_empty() {
521        vec!["Not Detected".into()]
522    } else {
523        found
524    }
525}
526
527// ── 8. CDN ──────────────────────────────────────────────────────────────────
528
529fn detect_cdn(server: &str, headers: &reqwest::header::HeaderMap, html: &str) -> Vec<String> {
530    let mut found = vec![];
531    let s = server.to_lowercase();
532    let via = get_header(headers, "via").to_lowercase();
533
534    if s.contains("cloudflare") || headers.contains_key("cf-ray") {
535        found.push("Cloudflare".into());
536    }
537    if s.contains("cloudfront") || via.contains("cloudfront") || headers.contains_key("x-amz-cf-id")
538    {
539        found.push("AWS CloudFront".into());
540    }
541    if s.contains("fastly") || via.contains("fastly") {
542        found.push("Fastly".into());
543    }
544    if s.contains("keycdn") {
545        found.push("KeyCDN".into());
546    }
547    if html.contains("maxcdn") {
548        found.push("MaxCDN".into());
549    }
550    if s.contains("akamai") || headers.contains_key("x-akamai-transformed") {
551        found.push("Akamai".into());
552    }
553
554    if found.is_empty() {
555        vec!["Not Detected".into()]
556    } else {
557        found
558    }
559}
560
561// ── 10. Security Headers ────────────────────────────────────────────────────
562
563fn analyze_security_headers(
564    headers: &reqwest::header::HeaderMap,
565) -> HashMap<String, SecurityHeaderInfo> {
566    let mut result = HashMap::new();
567    for &(name, importance) in SECURITY_HEADERS {
568        let present = headers.contains_key(name);
569        let value = headers
570            .get(name)
571            .and_then(|v| v.to_str().ok())
572            .unwrap_or("Not Set")
573            .to_string();
574        result.insert(
575            name.to_string(),
576            SecurityHeaderInfo {
577                present,
578                value,
579                security_level: if present {
580                    importance.to_string()
581                } else {
582                    "Low".into()
583                },
584            },
585        );
586    }
587    result
588}
589
590// ── 11. Security Vulnerabilities ────────────────────────────────────────────
591
592fn detect_vulnerabilities(html: &str, headers: &reqwest::header::HeaderMap) -> VulnerabilityInfo {
593    let mut missing = vec![];
594    let required = [
595        ("Content-Security-Policy", "CSP Header Missing - XSS Risk"),
596        ("X-Frame-Options", "Clickjacking Protection Missing"),
597        ("X-Content-Type-Options", "MIME Sniffing Protection Missing"),
598        ("Strict-Transport-Security", "HSTS Missing - MITM Risk"),
599        ("X-XSS-Protection", "XSS Protection Header Missing"),
600    ];
601    for &(header, risk) in &required {
602        if !headers.contains_key(header) {
603            missing.push(risk.to_string());
604        }
605    }
606
607    let mut insecure = vec![];
608    if html.contains("http://") && html.contains("https://") {
609        insecure.push("Mixed Content - HTTP resources on HTTPS page".into());
610    }
611
612    let mut exposed = vec![];
613    let debug_patterns = [
614        (r"debug.*true", "Debug mode enabled"),
615        (r"error.*trace", "Error traces exposed"),
616        (r"stack.*trace", "Stack traces visible"),
617        (r"sql.*error", "SQL errors exposed"),
618    ];
619    for &(pattern, desc) in &debug_patterns {
620        if Regex::new(pattern)
621            .ok()
622            .map(|r| r.is_match(html))
623            .unwrap_or(false)
624        {
625            exposed.push(desc.to_string());
626        }
627    }
628
629    VulnerabilityInfo {
630        missing_security_headers: missing,
631        insecure_practices: insecure,
632        exposed_information: exposed,
633    }
634}
635
636// ── 12. Information Disclosure ───────────────────────────────────────────────
637
638fn detect_disclosure(html: &str, server: &str, powered_by: &str) -> DisclosureInfo {
639    let mut server_info = vec![];
640    if Regex::new(r"/[\d\.]+")
641        .ok()
642        .map(|r| r.is_match(server))
643        .unwrap_or(false)
644    {
645        server_info.push(format!("Server version exposed: {}", server));
646    }
647
648    let mut tech = vec![];
649    if !powered_by.is_empty() {
650        tech.push(format!("Technology stack exposed: {}", powered_by));
651    }
652
653    let mut files = vec![];
654    if html.contains("c:\\") || html.contains("c:/") {
655        files.push("Windows file paths exposed".into());
656    }
657    if html.contains("/var/www/") {
658        files.push("Linux file paths exposed".into());
659    }
660    if html.contains("/home/") {
661        files.push("User directories exposed".into());
662    }
663    if html.contains(".env") {
664        files.push("Environment files referenced".into());
665    }
666
667    DisclosureInfo {
668        server_info,
669        technology_disclosure: tech,
670        file_exposure: files,
671    }
672}
673
674// ── 13. Security Services (WAF) ─────────────────────────────────────────────
675
676fn detect_waf(headers_str: &str, html: &str) -> SecurityServicesInfo {
677    let mut waf = vec![];
678    for &(name, indicators) in WAF_INDICATORS {
679        if indicators
680            .iter()
681            .any(|i| headers_str.contains(i) || html.contains(i))
682        {
683            waf.push(name.to_string());
684        }
685    }
686    SecurityServicesInfo { waf }
687}
688
689// ── 14. Cookie Security ─────────────────────────────────────────────────────
690
691fn analyze_cookies(headers: &reqwest::header::HeaderMap) -> CookieSecurityInfo {
692    let cookie_str = headers
693        .get("set-cookie")
694        .and_then(|v| v.to_str().ok())
695        .unwrap_or("");
696
697    if cookie_str.is_empty() {
698        return CookieSecurityInfo {
699            secure_flag: false,
700            httponly_flag: false,
701            samesite_attribute: false,
702            security_score: 0,
703            security_level: "N/A".into(),
704            recommendations: vec!["No cookies detected".into()],
705        };
706    }
707
708    let secure = cookie_str.to_lowercase().contains("secure");
709    let httponly = cookie_str.to_lowercase().contains("httponly");
710    let samesite = cookie_str.to_lowercase().contains("samesite");
711
712    let mut score = 0u32;
713    let mut recs = vec![];
714    if secure {
715        score += 40;
716    } else {
717        recs.push("Add Secure flag to cookies".into());
718    }
719    if httponly {
720        score += 30;
721    } else {
722        recs.push("Add HttpOnly flag to prevent XSS".into());
723    }
724    if samesite {
725        score += 30;
726    } else {
727        recs.push("Add SameSite attribute for CSRF protection".into());
728    }
729
730    let level = if score >= 90 {
731        "Excellent"
732    } else if score >= 70 {
733        "Good"
734    } else if score >= 50 {
735        "Fair"
736    } else {
737        "Poor"
738    };
739
740    CookieSecurityInfo {
741        secure_flag: secure,
742        httponly_flag: httponly,
743        samesite_attribute: samesite,
744        security_score: score,
745        security_level: level.into(),
746        recommendations: recs,
747    }
748}
749
750// ── 15. WordPress Analysis ──────────────────────────────────────────────────
751
752fn is_wp(html: &str) -> bool {
753    let indicators = [
754        html.contains("wp-content/"),
755        html.contains("wp-includes/"),
756        html.contains("wp-admin/"),
757        html.contains("wp-json/"),
758        html.contains("xmlrpc.php"),
759    ];
760    indicators.iter().filter(|&&x| x).count() >= 2
761}
762
763async fn analyze_wordpress(
764    client: &Client,
765    domain: &str,
766    html: &str,
767    version: String,
768    theme: String,
769    plugins: Vec<String>,
770) -> WordPressAnalysis {
771    let base_url = format!("https://{}", domain);
772
773    // Confidence
774    let confidence = if html.contains("wp-content/") && html.contains("wp-includes/") {
775        "High"
776    } else {
777        "Medium"
778    };
779
780    // Users via REST API
781    let users_found = enumerate_wp_users(client, &base_url).await;
782
783    // REST API
784    let rest_api = check_wp_endpoint(client, &format!("{}/wp-json/", base_url)).await;
785
786    // XMLRPC
787    let xmlrpc = check_wp_xmlrpc(client, &base_url).await;
788
789    // Admin / Login
790    let admin = check_wp_endpoint(client, &format!("{}/wp-admin/", base_url)).await;
791    let login = check_wp_endpoint(client, &format!("{}/wp-login.php", base_url)).await;
792
793    // Debug
794    let debug = html.contains("wp_debug")
795        || Regex::new(r"fatal error.*wp-")
796            .ok()
797            .map(|r| r.is_match(html))
798            .unwrap_or(false);
799
800    // Security issues
801    let mut issues = vec![];
802    if rest_api {
803        issues.push("REST API enabled - user enumeration possible".into());
804    }
805    if xmlrpc {
806        issues.push("XML-RPC enabled - brute force risk".into());
807    }
808    if debug {
809        issues.push("Debug information potentially exposed".into());
810    }
811    if !users_found.is_empty() {
812        issues.push(format!(
813            "{} users enumerated via REST API",
814            users_found.len()
815        ));
816    }
817
818    WordPressAnalysis {
819        confidence: confidence.into(),
820        version,
821        theme,
822        plugins,
823        users_found,
824        rest_api_enabled: rest_api,
825        xmlrpc_enabled: xmlrpc,
826        admin_accessible: admin,
827        login_accessible: login,
828        debug_enabled: debug,
829        security_issues: issues,
830    }
831}
832
833fn extract_wp_version(html: &str, doc: &Html) -> String {
834    // Check generator meta
835    if let Ok(sel) = Selector::parse("meta[name=\"generator\"]") {
836        if let Some(el) = doc.select(&sel).next() {
837            if let Some(content) = el.value().attr("content") {
838                if content.to_lowercase().contains("wordpress") {
839                    if let Some(m) = Regex::new(r"(?i)wordpress\s+([\d\.]+)")
840                        .ok()
841                        .and_then(|r| r.captures(content))
842                    {
843                        return m.get(1).unwrap().as_str().to_string();
844                    }
845                }
846            }
847        }
848    }
849    // Regex on HTML
850    if let Some(m) = Regex::new(r#"ver=([\d\.]+)"#)
851        .ok()
852        .and_then(|r| r.captures(html))
853    {
854        return m.get(1).unwrap().as_str().to_string();
855    }
856    "Unknown".into()
857}
858
859fn extract_wp_theme(doc: &Html) -> String {
860    let theme_re = Regex::new(r"/wp-content/themes/([^/]+)").ok();
861    if let Ok(sel) = Selector::parse("link[rel=\"stylesheet\"]") {
862        for el in doc.select(&sel) {
863            if let Some(href) = el.value().attr("href") {
864                if href.contains("wp-content/themes/") {
865                    if let Some(m) = theme_re.as_ref().and_then(|r| r.captures(href)) {
866                        return m.get(1).unwrap().as_str().to_string();
867                    }
868                }
869            }
870        }
871    }
872    "Unknown".into()
873}
874
875fn extract_wp_plugins(html: &str, doc: &Html) -> Vec<String> {
876    let mut plugins = std::collections::HashSet::new();
877    let plugin_re = Regex::new(r"/wp-content/plugins/([^/]+)").ok();
878
879    // From script/link srcs
880    let selectors = ["script[src]", "link[rel=\"stylesheet\"]"];
881    for sel_str in &selectors {
882        if let Ok(sel) = Selector::parse(sel_str) {
883            for el in doc.select(&sel) {
884                let attr = el
885                    .value()
886                    .attr("src")
887                    .or_else(|| el.value().attr("href"))
888                    .unwrap_or("");
889                if attr.contains("wp-content/plugins/") {
890                    if let Some(m) = plugin_re.as_ref().and_then(|r| r.captures(attr)) {
891                        plugins.insert(m.get(1).unwrap().as_str().to_string());
892                    }
893                }
894            }
895        }
896    }
897
898    // Known plugin signatures in HTML
899    for &(slug, _name) in WP_KNOWN_PLUGINS {
900        if html.contains(slug) {
901            plugins.insert(slug.to_string());
902        }
903    }
904
905    // Map slugs to names
906    plugins
907        .into_iter()
908        .map(|slug| {
909            WP_KNOWN_PLUGINS
910                .iter()
911                .find(|&&(s, _)| s == slug.as_str())
912                .map(|&(_, name)| name.to_string())
913                .unwrap_or_else(|| slug.replace('-', " "))
914        })
915        .collect()
916}
917
918async fn enumerate_wp_users(client: &Client, base_url: &str) -> Vec<WpUser> {
919    let url = format!("{}/wp-json/wp/v2/users", base_url);
920    match client.get(&url).send().await {
921        Ok(resp) if resp.status().is_success() => {
922            if let Ok(users) = resp.json::<Vec<serde_json::Value>>().await {
923                return users
924                    .iter()
925                    .filter_map(|u| {
926                        Some(WpUser {
927                            id: u.get("id")?.as_u64()?,
928                            username: u.get("slug")?.as_str()?.to_string(),
929                            display_name: u.get("name")?.as_str()?.to_string(),
930                        })
931                    })
932                    .collect();
933            }
934        }
935        _ => {}
936    }
937    vec![]
938}
939
940async fn check_wp_endpoint(client: &Client, url: &str) -> bool {
941    match client.get(url).send().await {
942        Ok(r) => [200, 301, 302].contains(&r.status().as_u16()),
943        Err(_) => false,
944    }
945}
946
947async fn check_wp_xmlrpc(client: &Client, base_url: &str) -> bool {
948    let url = format!("{}/xmlrpc.php", base_url);
949    match client.get(&url).send().await {
950        Ok(r) if r.status().is_success() => r
951            .text()
952            .await
953            .unwrap_or_default()
954            .contains("XML-RPC server accepts POST requests only"),
955        _ => false,
956    }
957}
958
959// ── 16. Security Score ──────────────────────────────────────────────────────
960
961fn calculate_score(
962    headers: &HashMap<String, SecurityHeaderInfo>,
963    vulns: &VulnerabilityInfo,
964    disclosure: &DisclosureInfo,
965    services: &SecurityServicesInfo,
966    cookies: &CookieSecurityInfo,
967    wp: &Option<WordPressAnalysis>,
968) -> SecurityScoreResult {
969    let mut score: i32 = 100;
970    let mut issues = vec![];
971    let mut recs = vec![];
972
973    // Security headers (−8 per missing)
974    let missing = headers.values().filter(|h| !h.present).count() as i32;
975    score -= missing * 8;
976    if missing > 0 {
977        issues.push(format!("{} critical security headers missing", missing));
978        recs.push("Implement missing security headers".into());
979    }
980
981    // Missing headers from vuln check (−5 each)
982    score -= vulns.missing_security_headers.len() as i32 * 5;
983
984    // Insecure practices (−10 each)
985    for p in &vulns.insecure_practices {
986        score -= 10;
987        issues.push(p.clone());
988    }
989
990    // Info disclosure (−5 each)
991    let disc_count = disclosure.server_info.len()
992        + disclosure.technology_disclosure.len()
993        + disclosure.file_exposure.len();
994    score -= disc_count as i32 * 5;
995
996    // WAF bonus (+5)
997    if !services.waf.is_empty() {
998        score += 5;
999        recs.push("WAF detected - Good security practice".into());
1000    }
1001
1002    // Cookie security
1003    if cookies.security_score < 70 && cookies.security_level != "N/A" {
1004        score -= 10;
1005        issues.push("Insecure cookie configuration".into());
1006        recs.push("Implement secure cookie flags".into());
1007    }
1008
1009    // WordPress
1010    if let Some(wp_info) = wp {
1011        for issue in &wp_info.security_issues {
1012            score -= 5;
1013            issues.push(issue.clone());
1014        }
1015    }
1016
1017    let final_score = score.clamp(0, 100) as u32;
1018
1019    let grade = match final_score {
1020        90..=100 => "A+",
1021        85..=89 => "A",
1022        80..=84 => "A-",
1023        75..=79 => "B+",
1024        70..=74 => "B",
1025        65..=69 => "B-",
1026        60..=64 => "C+",
1027        55..=59 => "C",
1028        50..=54 => "C-",
1029        40..=49 => "D",
1030        _ => "F",
1031    };
1032
1033    let risk = match final_score {
1034        80..=100 => "Low Risk",
1035        60..=79 => "Medium Risk",
1036        40..=59 => "High Risk",
1037        _ => "Critical Risk",
1038    };
1039
1040    SecurityScoreResult {
1041        overall_score: final_score,
1042        security_grade: grade.into(),
1043        risk_level: risk.into(),
1044        critical_issues: issues.into_iter().take(5).collect(),
1045        recommendations: recs.into_iter().take(5).collect(),
1046    }
1047}
1048
1049// ── Helpers ──────────────────────────────────────────────────────────────────
1050
1051fn get_header(headers: &reqwest::header::HeaderMap, name: &str) -> String {
1052    headers
1053        .get(name)
1054        .and_then(|v| v.to_str().ok())
1055        .unwrap_or("")
1056        .to_string()
1057}
1058
1059fn collect_script_srcs(doc: &Html) -> String {
1060    let sel = Selector::parse("script[src]").unwrap();
1061    doc.select(&sel)
1062        .filter_map(|el| el.value().attr("src"))
1063        .collect::<Vec<_>>()
1064        .join(" ")
1065        .to_lowercase()
1066}
1067
1068fn collect_stylesheet_hrefs(doc: &Html) -> String {
1069    let sel = Selector::parse("link[rel=\"stylesheet\"]").unwrap();
1070    doc.select(&sel)
1071        .filter_map(|el| el.value().attr("href"))
1072        .collect::<Vec<_>>()
1073        .join(" ")
1074        .to_lowercase()
1075}