Skip to main content

gossan_headless/
lib.rs

1#![forbid(unsafe_code)]
2// pedantic moved to workspace [lints.clippy] in root Cargo.toml
3#![cfg_attr(
4    not(test),
5    deny(
6        clippy::unwrap_used,
7        clippy::expect_used,
8        clippy::todo,
9        clippy::unimplemented,
10        clippy::panic
11    )
12)]
13#![allow(
14    clippy::module_name_repetitions,
15    clippy::must_use_candidate,
16    clippy::missing_errors_doc
17)]
18
19//! Headless browser scanning — screenshot, DOM analysis, SPA detection.
20//!
21//! Uses chromiumoxide to render JavaScript-heavy pages and extract
22//! security-relevant signals that static HTTP probing cannot see.
23
24use async_trait::async_trait;
25use chromiumoxide::{Browser, BrowserConfig};
26use futures::StreamExt;
27use gossan_core::{Config, ScanInput, Scanner, Target};
28use secfinding::{Evidence, Finding, FindingBuilder, Severity};
29use std::time::Duration;
30/// Headless browser scanner — screenshot, DOM analysis, SPA spider, dynamic endpoint discovery.
31pub struct HeadlessScanner;
32
33fn finding_builder(
34    target: &Target,
35    severity: Severity,
36    title: impl Into<String>,
37    detail: impl Into<String>,
38) -> FindingBuilder {
39    Finding::builder("headless", target.domain().unwrap_or("?"), severity)
40        .title(title)
41        .detail(detail)
42        .kind(secfinding::FindingKind::InfoDisclosure)
43}
44
45#[async_trait]
46impl Scanner for HeadlessScanner {
47    fn name(&self) -> &'static str {
48        "headless"
49    }
50
51    fn tags(&self) -> &[&'static str] {
52        &["headless", "browser", "dynamic"]
53    }
54
55    fn accepts(&self, target: &Target) -> bool {
56        matches!(target, Target::Web(_))
57    }
58
59    async fn run(&self, input: ScanInput, config: &Config) -> anyhow::Result<()> {
60        // Drain the inbound target stream into an owned Vec. The
61        // ScanInput contract migrated from a buffered `targets: Vec<_>`
62        // field to a streaming `target_rx: Mutex<UnboundedReceiver>` —
63        // headless was missed in that migration. Pull synchronously
64        // here because chromiumoxide's per-tab work needs an owned set
65        // upfront to size the buffer_unordered pool.
66        let owned: Vec<Target> = {
67            let mut rx = input.target_rx.lock().await;
68            let mut buf = Vec::new();
69            while let Ok(t) = rx.try_recv() {
70                buf.push(t);
71            }
72            buf
73        };
74
75        if owned.is_empty() {
76            return Ok(());
77        }
78
79        // Configure browser (headless, sandbox disabled for CI environments)
80        let (browser, mut handler) = Browser::launch(
81            BrowserConfig::builder()
82                .with_head()
83                .no_sandbox()
84                .build()
85                .map_err(|e| anyhow::anyhow!("config error: {e}"))?,
86        )
87        .await
88        .map_err(|e| anyhow::anyhow!("Failed to launch browser: {:?}", e))?;
89
90        let browser = std::sync::Arc::new(browser);
91
92        // Maintain the handler connection
93        let handle = tokio::spawn(async move {
94            while let Some(h) = handler.next().await {
95                if h.is_err() {
96                    break;
97                }
98            }
99        });
100
101        // Parallel execution of all targets using the single browser instance
102        let results: Vec<anyhow::Result<(Target, Vec<Finding>)>> = futures::stream::iter(owned)
103            .map(|target| {
104                let browser = std::sync::Arc::clone(&browser);
105                let config = config.clone();
106                async move { analyze_target(&browser, target, &config).await }
107            })
108            // Browser limit for tabs
109            .buffer_unordered(config.concurrency.min(10))
110            .collect()
111            .await;
112
113        for (target, findings) in results.into_iter().flatten() {
114            input.emit_target(target);
115            for f in findings {
116                input.emit(f);
117            }
118        }
119
120        handle.abort();
121
122        // ... (headless logic remains same for now as it uses chrome)
123        Ok(())
124    }
125}
126
127async fn analyze_target(
128    browser: &Browser,
129    mut target: Target,
130    config: &Config,
131) -> anyhow::Result<(Target, Vec<Finding>)> {
132    let Target::Web(ref asset) = target else {
133        return Ok((target, vec![]));
134    };
135    let mut findings = Vec::new();
136
137    let page = browser.new_page(asset.url.as_str()).await?;
138
139    // ── XHR / Fetch Hooking (Legendary Dynamic Discovery) ──────────────────
140    // Inject a script to proxy XHR and fetch to catch endpoints that
141    // standard event listeners might miss due to race conditions.
142    let hook_js = r#"
143        (function() {
144            window._santh_requests = [];
145            
146            // Hook Fetch
147            const oldFetch = window.fetch;
148            window.fetch = function() {
149                window._santh_requests.push({ url: arguments[0], type: 'fetch' });
150                return oldFetch.apply(this, arguments);
151            };
152
153            // Hook XHR
154            const oldOpen = XMLHttpRequest.prototype.open;
155            XMLHttpRequest.prototype.open = function() {
156                window._santh_requests.push({ url: arguments[1], type: 'xhr' });
157                return oldOpen.apply(this, arguments);
158            };
159        })();
160    "#;
161    page.evaluate_on_new_document(hook_js).await.ok();
162
163    // Start event listener early to catch everything from the jump
164    let mut request_events = page
165        .event_listener::<chromiumoxide::cdp::browser_protocol::network::EventRequestWillBeSent>()
166        .await?;
167
168    let _ = page.goto(asset.url.as_str()).await?;
169
170    // Wait for the initial DOM load
171    page.wait_for_navigation().await.ok();
172
173    // ── 1. Authenticated Login (Katana-style) ─────────────────────────────
174    if let (Some(user), Some(pass)) = (&config.auth_user, &config.auth_pass) {
175        let login_probe = r#"
176            (function() {
177                const forms = document.forms;
178                for (const f of forms) {
179                    let hasPassword = false;
180                    let userField = null;
181                    let passField = null;
182                    for (const i of f.elements) {
183                        const t = (i.type || '').toLowerCase();
184                        if (t === 'password') {
185                            hasPassword = true;
186                            passField = i;
187                        } else if (t === 'text' || t === 'email' || t === 'username') {
188                            if (!userField) userField = i;
189                        }
190                    }
191                    if (hasPassword && userField && passField) {
192                        userField.setAttribute('data-santh-auth', 'user');
193                        passField.setAttribute('data-santh-auth', 'pass');
194                        return true;
195                    }
196                }
197                return false;
198            })()
199        "#;
200
201        if let Ok(res) = page.evaluate(login_probe).await {
202            if res.value().and_then(|v| v.as_bool()).unwrap_or(false) {
203                if let Ok(user_el) = page.find_element("input[data-santh-auth='user']").await {
204                    let _ = user_el.type_str(user).await;
205                }
206                if let Ok(pass_el) = page.find_element("input[data-santh-auth='pass']").await {
207                    let _ = pass_el.type_str(pass).await;
208                    let _ = pass_el.press_key("Enter").await;
209                }
210                // Allow some time for the login to process and session to establish
211                tokio::time::sleep(Duration::from_secs(3)).await;
212            }
213        }
214    }
215
216    // ── 2. Stateful Spidering (Clicking all a/button) ─────────────────────
217    let click_probe = r#"
218        (function() {
219            const elements = document.querySelectorAll('a, button');
220            const result = [];
221            for (let i = 0; i < Math.min(elements.length, 30); i++) {
222                const el = elements[i];
223                const text = (el.innerText || el.value || '').toLowerCase();
224                // Skip destructive actions to avoid losing session or breaking state
225                if (text.includes('logout') || text.includes('sign out') || text.includes('delete') || text.includes('remove')) {
226                    continue;
227                }
228                el.setAttribute('data-santh-click', i);
229                result.push(i);
230            }
231            return result;
232        })()
233    "#;
234
235    if let Ok(res) = page.evaluate(click_probe).await {
236        if let Some(idxs) = res.value().and_then(|v| v.as_array()) {
237            for idx in idxs {
238                if let Some(i) = idx.as_u64() {
239                    let selector = format!("[data-santh-click='{}']", i);
240                    if let Ok(el) = page.find_element(&selector).await {
241                        let _ = el.click().await;
242                        // Brief wait for dynamic route changes or background XHRs
243                        tokio::time::sleep(Duration::from_millis(400)).await;
244                    }
245                }
246            }
247        }
248    }
249
250    // Final idle to catch trailing asynchronous requests (React/Vue/Angular)
251    tokio::time::sleep(Duration::from_secs(2)).await;
252
253    // ── 3. Evidence Collection ─────────────────────────────────────────────
254
255    // Collect findings from our injected JS hook
256    if let Ok(res) = page.evaluate("window._santh_requests").await {
257        if let Some(reqs) = res.value().and_then(|v| v.as_array()) {
258            for r in reqs {
259                let url = r.get("url").and_then(|v| v.as_str()).unwrap_or("");
260                let typ = r.get("type").and_then(|v| v.as_str()).unwrap_or("unknown");
261                if !url.is_empty() && !url.starts_with("data:") {
262                    gossan_core::try_push_finding(
263                        finding_builder(
264                            &target,
265                            Severity::Info,
266                            format!("Dynamic {} Endpoint Hooked", typ.to_uppercase()),
267                            format!("Injected hook trapped runtime {} request to: {}", typ, url),
268                        )
269                        .tag("recon")
270                        .tag("hooked_request")
271                        .evidence(Evidence::Raw(url.to_string().into())),
272                        &mut findings,
273                    );
274                }
275            }
276        }
277    }
278
279    // Drain all trapped network requests from the CDP listener too
280    while let Ok(Some(req)) =
281        tokio::time::timeout(Duration::from_millis(200), request_events.next()).await
282    {
283        let url = req.request.url.clone();
284
285        // Filter out obvious noise, trap API paths
286        if url.contains("api") || url.ends_with(".json") || url.ends_with(".graphql") {
287            gossan_core::try_push_finding(
288                finding_builder(
289                    &target,
290                    Severity::Info,
291                    "Dynamic API Endpoint Trapped",
292                    format!("Trapped runtime XHR request to: {}", url),
293                )
294                .tag("recon")
295                .tag("dynamic_xhr")
296                .evidence(Evidence::HttpResponse {
297                    status: 200,
298                    headers: vec![],
299                    body_excerpt: Some(
300                        format!(
301                            "Method: {}, Headers: {:?}",
302                            req.request.method, req.request.headers
303                        )
304                        .into(),
305                    ),
306                }),
307                &mut findings,
308            );
309        }
310    }
311
312    // ── Global Variable Extraction ──────────────────────────────────────────
313    // Look for common sensitive global variables or config objects
314    let js_probe = r#"
315        (function() {
316            const interesting = [];
317            const keys = ['config', 'env', 'process', 'API_KEY', 'SECRET', 'TOKEN', 'auth', 'firebase', 'aws'];
318            for (const key of Object.keys(window)) {
319                if (keys.some(k => key.toLowerCase().includes(k.toLowerCase()))) {
320                    try {
321                        const val = window[key];
322                        if (val && typeof val === 'object') {
323                            interesting.push({key, value: JSON.stringify(val).substring(0, 500)});
324                        } else if (val) {
325                            interesting.push({key, value: String(val).substring(0, 200)});
326                        }
327                    } catch(e) {}
328                }
329            }
330            return interesting;
331        })()
332    "#;
333
334    if let Ok(res) = page.evaluate(js_probe).await {
335        if let Some(interesting) = res.value().and_then(|v| v.as_array()) {
336            for item in interesting {
337                let key = item.get("key").and_then(|v| v.as_str()).unwrap_or("?");
338                let value = item.get("value").and_then(|v| v.as_str()).unwrap_or("?");
339
340                gossan_core::try_push_finding(finding_builder(
341                    &target,
342                    Severity::Low,
343                    format!("Sensitive JS global detected: {}", key),
344                    format!("Found global object/variable `{}` which may contain configuration or credentials.", key),
345                )
346                .tag("recon")
347                .tag("js-global")
348                .evidence(Evidence::Raw(format!("{}: {}", key, value).into())), &mut findings);
349            }
350        }
351    }
352
353    // ── Form Extraction ─────────────────────────────────────────────────────
354    let form_probe = r#"
355        (function() {
356            const forms = [];
357            for (const f of document.forms) {
358                const inputs = [];
359                for (const i of f.elements) {
360                    if (i.name) {
361                        inputs.push([i.name, i.type || 'text']);
362                    }
363                }
364                forms.push({
365                    action: f.action,
366                    method: f.method || 'GET',
367                    inputs: inputs
368                });
369            }
370            return forms;
371        })()
372    "#;
373
374    let mut discovered_forms = Vec::new();
375    if let Ok(res) = page.evaluate(form_probe).await {
376        if let Some(forms) = res.value().and_then(|v| v.as_array()) {
377            for f in forms {
378                let action = f
379                    .get("action")
380                    .and_then(|v| v.as_str())
381                    .unwrap_or("")
382                    .to_string();
383                let method = f
384                    .get("method")
385                    .and_then(|v| v.as_str())
386                    .unwrap_or("GET")
387                    .to_string();
388                let mut inputs = Vec::new();
389                if let Some(ins) = f.get("inputs").and_then(|v| v.as_array()) {
390                    for i in ins {
391                        if let Some(pair) = i.as_array() {
392                            let name = pair
393                                .first()
394                                .and_then(|v| v.as_str())
395                                .unwrap_or("")
396                                .to_string();
397                            let typ = pair
398                                .get(1)
399                                .and_then(|v| v.as_str())
400                                .unwrap_or("text")
401                                .to_string();
402                            inputs.push((name, typ));
403                        }
404                    }
405                }
406                discovered_forms.push(gossan_core::DiscoveredForm {
407                    action,
408                    method,
409                    inputs,
410                });
411            }
412        }
413    }
414
415    page.close().await.ok();
416
417    // Update the asset with discovered forms
418    if let Target::Web(ref mut asset) = target {
419        asset.forms = discovered_forms;
420    }
421
422    Ok((target, findings))
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428    use gossan_core::{HostTarget, Protocol, ServiceTarget, WebAssetTarget};
429    use url::Url;
430
431    fn web_target() -> Target {
432        Target::Web(Box::new(WebAssetTarget {
433            url: Url::parse("https://example.com")
434                .unwrap_or_else(|_| Url::parse("http://127.0.0.1").unwrap()),
435            service: ServiceTarget {
436                host: HostTarget {
437                    ip: "127.0.0.1"
438                        .parse()
439                        .unwrap_or_else(|_| "127.0.0.1".parse().unwrap()),
440                    domain: Some("example.com".into()),
441                },
442                port: 443,
443                protocol: Protocol::Tcp,
444                banner: None,
445                tls: true,
446            },
447            tech: vec![],
448            status: 200,
449            title: None,
450            favicon_hash: None,
451            body_hash: None,
452            forms: vec![],
453            params: vec![],
454        }))
455    }
456
457    #[test]
458    fn scanner_metadata_is_stable() {
459        let scanner = HeadlessScanner;
460        assert_eq!(scanner.name(), "headless");
461    }
462
463    #[test]
464    fn scanner_accepts_only_web_targets() {
465        let scanner = HeadlessScanner;
466        assert!(scanner.accepts(&web_target()));
467        assert!(!scanner.accepts(&Target::Host(HostTarget {
468            ip: "127.0.0.1"
469                .parse()
470                .unwrap_or_else(|_| "127.0.0.1".parse().unwrap()),
471            domain: None,
472        })));
473    }
474
475    #[tokio::test]
476    async fn test_analyze_target_graceful_on_invalid_url() {
477        // Use a headless browser with no sandbox for environment compatibility
478        let (browser, mut handler) = match Browser::launch(
479            BrowserConfig::builder()
480                .no_sandbox()
481                .build()
482                .expect("Failed to build BrowserConfig"),
483        )
484        .await
485        {
486            Ok(b) => b,
487            Err(_) => return, // Skip if browser cannot launch in this environment
488        };
489
490        tokio::spawn(async move { while let Some(_) = handler.next().await {} });
491
492        let mut target = web_target();
493        if let Target::Web(ref mut asset) = target {
494            asset.url = Url::parse("http://0.0.0.0:1").expect("Invalid URL");
495        }
496        let config = Config::default();
497
498        let result = analyze_target(&browser, target, &config).await;
499        assert!(result.is_err());
500    }
501
502    #[tokio::test]
503    async fn test_analyze_target_with_incomplete_auth_does_not_panic() {
504        let (browser, mut handler) = match Browser::launch(
505            BrowserConfig::builder()
506                .no_sandbox()
507                .build()
508                .expect("Failed to build BrowserConfig"),
509        )
510        .await
511        {
512            Ok(b) => b,
513            Err(_) => return,
514        };
515
516        tokio::spawn(async move { while let Some(_) = handler.next().await {} });
517
518        let target = web_target();
519        let mut config = Config::default();
520        config.auth_user = Some("admin".into());
521        config.auth_pass = None; // Should skip login logic
522
523        let _ = analyze_target(&browser, target, &config).await;
524    }
525}