Skip to main content

cortex_runtime/acquisition/
auth.rs

1//! Authentication handlers -- login via password, OAuth, API key.
2//!
3//! Discovers login methods from HTML, executes password-based login via HTTP
4//! (no browser needed for standard forms), and creates authenticated sessions.
5
6use crate::acquisition::http_client::HttpClient;
7use crate::acquisition::http_session::{AuthType, HttpSession};
8use anyhow::{bail, Result};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13// ---- Public types -----------------------------------------------------------
14
15/// Discovered login method for a site.
16#[derive(Debug, Clone)]
17pub enum LoginMethod {
18    /// Password form found at a URL.
19    Password {
20        /// URL of the page containing the login form.
21        form_url: String,
22        /// Resolved action URL where the form POSTs to.
23        form_action: String,
24        /// HTTP method (usually `POST`).
25        method: String,
26        /// All fields in the login form.
27        fields: Vec<LoginFormField>,
28    },
29    /// OAuth providers detected.
30    OAuth {
31        /// Names of detected OAuth providers (e.g. `"google"`, `"github"`).
32        providers: Vec<String>,
33    },
34    /// API key documentation found.
35    ApiKey {
36        /// URL to API key documentation, if discovered.
37        docs_url: Option<String>,
38    },
39    /// Could not determine login method.
40    Unknown,
41}
42
43/// A field in a login form.
44#[derive(Debug, Clone)]
45pub struct LoginFormField {
46    /// The `name` attribute of the input element.
47    pub name: String,
48    /// The `type` attribute (e.g. `"text"`, `"password"`, `"hidden"`).
49    pub field_type: String,
50    /// Pre-filled value (for hidden fields like CSRF tokens).
51    pub value: Option<String>,
52    /// Whether this field is the username/email field.
53    pub is_username: bool,
54    /// Whether this field is the password field.
55    pub is_password: bool,
56}
57
58// ---- OAuth types ------------------------------------------------------------
59
60/// Result of an HTTP-native OAuth flow.
61///
62/// OAuth redirect chains can often be completed without a browser when consent
63/// was previously granted. When consent *is* needed, the flow pauses for
64/// agent/user approval.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub enum OAuthResult {
67    /// Consent was pre-approved — auth code obtained via redirect chain.
68    SilentSuccess {
69        /// The authorization code.
70        code: String,
71        /// The final redirect URL containing the code.
72        redirect_url: String,
73    },
74    /// Consent is needed — pausing for user/agent approval.
75    ConsentRequired {
76        /// OAuth scopes requested.
77        scopes: Vec<String>,
78        /// Application name requesting access.
79        app_name: String,
80        /// The parsed consent form for submission.
81        consent_form: ConsentForm,
82    },
83    /// Multi-factor authentication required during auth flow.
84    MfaRequired {
85        /// Type of MFA challenge.
86        mfa_type: MfaType,
87        /// The MFA challenge form.
88        challenge_form: HtmlForm,
89    },
90    /// HTTP approach failed — fall back to browser.
91    BrowserFallbackNeeded {
92        /// Reason the HTTP approach failed.
93        reason: String,
94    },
95}
96
97/// A parsed consent form from an OAuth provider.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ConsentForm {
100    /// The URL to POST consent approval to.
101    pub action_url: String,
102    /// Hidden fields required for the POST (state, CSRF, etc.).
103    pub hidden_fields: HashMap<String, String>,
104    /// The OAuth provider name.
105    pub provider: String,
106}
107
108/// Type of MFA challenge.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub enum MfaType {
111    /// Time-based one-time password (TOTP / authenticator app).
112    Totp,
113    /// SMS verification code.
114    Sms,
115    /// Email verification code.
116    Email,
117    /// Push notification (e.g., Duo).
118    Push,
119    /// Unknown MFA type.
120    Unknown,
121}
122
123/// A parsed HTML form with action URL and fields.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct HtmlForm {
126    /// The URL the form posts to.
127    pub action_url: String,
128    /// HTTP method (usually POST).
129    pub method: String,
130    /// All form fields: name → value.
131    pub fields: HashMap<String, String>,
132}
133
134// ---- Public async API -------------------------------------------------------
135
136/// Discover the login method for a site by fetching its homepage and login page.
137///
138/// Checks common login URL patterns (`/login`, `/signin`, `/auth`,
139/// `/account/login`, `/wp-login.php`). When a login page is found, its HTML
140/// is analysed for password forms and OAuth buttons.
141pub async fn discover_login_method(client: &HttpClient, domain: &str) -> Result<LoginMethod> {
142    let base_url = format!("https://{domain}");
143
144    // Fetch the homepage and look for login links.
145    let homepage = client.get(&base_url, 15_000).await?;
146    let login_links = find_login_links(&homepage.body, &base_url);
147
148    // Also try well-known login paths even if no link was found.
149    let well_known = [
150        format!("{base_url}/login"),
151        format!("{base_url}/signin"),
152        format!("{base_url}/auth/login"),
153        format!("{base_url}/account/login"),
154        format!("{base_url}/wp-login.php"),
155    ];
156
157    // Merge discovered links with well-known paths, deduplicated.
158    let mut candidates: Vec<String> = login_links;
159    for wk in &well_known {
160        if !candidates.contains(wk) {
161            candidates.push(wk.clone());
162        }
163    }
164
165    // Try each candidate until we find a login form.
166    for candidate_url in &candidates {
167        let resp = match client.get(candidate_url, 15_000).await {
168            Ok(r) => r,
169            Err(_) => continue,
170        };
171
172        // Skip non-200 responses.
173        if resp.status != 200 {
174            continue;
175        }
176
177        if let Some(method) = parse_login_form(&resp.body, &resp.final_url) {
178            return Ok(method);
179        }
180    }
181
182    // If no form found, check homepage itself for OAuth buttons.
183    if let Some(LoginMethod::OAuth { providers }) =
184        detect_oauth_from_html(&homepage.body).filter(|m| matches!(m, LoginMethod::OAuth { .. }))
185    {
186        if !providers.is_empty() {
187            return Ok(LoginMethod::OAuth { providers });
188        }
189    }
190
191    Ok(LoginMethod::Unknown)
192}
193
194/// Log in with username and password via HTML form POST.
195///
196/// Discovers the login form, fills in the username and password fields (keeping
197/// hidden fields like CSRF tokens), POSTs the form, and captures session
198/// cookies from `Set-Cookie` response headers.
199pub async fn login_password(
200    client: &HttpClient,
201    domain: &str,
202    username: &str,
203    password: &str,
204) -> Result<HttpSession> {
205    let method = discover_login_method(client, domain).await?;
206
207    let (form_action, http_method, fields) = match method {
208        LoginMethod::Password {
209            form_action,
210            method,
211            fields,
212            ..
213        } => (form_action, method, fields),
214        _ => bail!("no password login form found for {domain}"),
215    };
216
217    // Build form body: fill in username/password, keep hidden fields.
218    let mut form_data: Vec<(String, String)> = Vec::new();
219    for field in &fields {
220        if field.is_username {
221            form_data.push((field.name.clone(), username.to_string()));
222        } else if field.is_password {
223            form_data.push((field.name.clone(), password.to_string()));
224        } else if let Some(ref val) = field.value {
225            form_data.push((field.name.clone(), val.clone()));
226        }
227    }
228
229    // Extract CSRF token if present.
230    let csrf_token = fields
231        .iter()
232        .find(|f| is_csrf_field_name(&f.name))
233        .and_then(|f| f.value.clone());
234
235    // POST the form.
236    if http_method != "POST" {
237        bail!("login form uses {http_method}, expected POST");
238    }
239
240    let resp = client
241        .post_form(&form_action, &form_data, &[], 15_000)
242        .await?;
243
244    // Parse Set-Cookie headers.
245    let cookies = parse_set_cookies(&resp.headers);
246
247    if cookies.is_empty() && resp.status >= 400 {
248        bail!(
249            "login failed for {domain}: status {} with no cookies",
250            resp.status
251        );
252    }
253
254    let mut session = HttpSession::new(domain, AuthType::Password);
255    for (name, value) in cookies {
256        session.add_cookie(&name, &value);
257    }
258    session.csrf_token = csrf_token;
259
260    Ok(session)
261}
262
263/// Create an API-key authenticated session (no network call needed).
264///
265/// The key is stored as an auth header with the given name (e.g. `X-Api-Key`).
266pub fn login_api_key(domain: &str, key: &str, header_name: &str) -> HttpSession {
267    let mut session = HttpSession::new(domain, AuthType::ApiKey);
268    session.add_auth_header(header_name, key);
269    session
270}
271
272/// Create a bearer-token authenticated session (no network call needed).
273///
274/// Stores `Authorization: Bearer {token}` as an auth header.
275pub fn login_bearer(domain: &str, token: &str) -> HttpSession {
276    let mut session = HttpSession::new(domain, AuthType::Bearer);
277    session.add_auth_header("Authorization", &format!("Bearer {token}"));
278    session
279}
280
281// ---- OAuth HTTP flow --------------------------------------------------------
282
283/// Attempt OAuth login via HTTP redirect chain (no browser).
284///
285/// Follows the OAuth redirect chain via HTTP. If consent was previously
286/// granted, the provider redirects directly to the callback with an auth
287/// code — zero browser, sub-second. If consent is needed, parses the
288/// consent page HTML and returns `OAuthResult::ConsentRequired`.
289///
290/// # Arguments
291///
292/// * `client` - HTTP client for making requests.
293/// * `auth_url` - The initial OAuth authorization URL (e.g., from the login page).
294/// * `provider` - OAuth provider name (e.g., `"google"`, `"github"`).
295pub async fn login_oauth_http(
296    client: &HttpClient,
297    auth_url: &str,
298    provider: &str,
299) -> Result<OAuthResult> {
300    // Step 1: Follow redirects via HTTP GET.
301    // We use the client to follow the initial redirect chain.
302    let resp = client.get(auth_url, 15_000).await?;
303
304    // Check if we got redirected to a callback URL with an auth code.
305    // This happens when consent was previously granted.
306    if let Some(code) = extract_auth_code_from_url(&resp.final_url) {
307        tracing::info!("OAuth silent success for {provider}: consent was pre-approved");
308        return Ok(OAuthResult::SilentSuccess {
309            code,
310            redirect_url: resp.final_url,
311        });
312    }
313
314    // Check for MFA challenge pages.
315    if is_mfa_page(&resp.body) {
316        let mfa_type = detect_mfa_type(&resp.body);
317        let form = parse_first_form(&resp.body, &resp.final_url);
318        return Ok(match form {
319            Some(f) => OAuthResult::MfaRequired {
320                mfa_type,
321                challenge_form: f,
322            },
323            None => OAuthResult::BrowserFallbackNeeded {
324                reason: "MFA page found but could not parse form".to_string(),
325            },
326        });
327    }
328
329    // Step 2: If we're on a consent page, parse it.
330    if resp.status == 200 && is_consent_page(&resp.body) {
331        let scopes = extract_oauth_scopes(&resp.body);
332        let app_name = extract_app_name(&resp.body);
333        let consent_form = parse_consent_form(&resp.body, &resp.final_url, provider);
334
335        return Ok(match consent_form {
336            Some(form) => OAuthResult::ConsentRequired {
337                scopes,
338                app_name,
339                consent_form: form,
340            },
341            None => OAuthResult::BrowserFallbackNeeded {
342                reason: "consent page found but could not parse approval form".to_string(),
343            },
344        });
345    }
346
347    // Step 3: If the response is an error or unknown page, fall back to browser.
348    if resp.status >= 400 {
349        return Ok(OAuthResult::BrowserFallbackNeeded {
350            reason: format!("OAuth redirect returned status {}", resp.status),
351        });
352    }
353
354    // Unknown page — might need browser interaction.
355    Ok(OAuthResult::BrowserFallbackNeeded {
356        reason: "could not complete OAuth flow via HTTP".to_string(),
357    })
358}
359
360/// Complete an OAuth consent form by POSTing approval.
361///
362/// Called after the agent/user approves the consent request.
363///
364/// Returns the authorization code on success.
365pub async fn complete_oauth_consent(
366    client: &HttpClient,
367    consent_form: &ConsentForm,
368    approved: bool,
369) -> Result<String> {
370    if !approved {
371        bail!("OAuth consent was denied by user/agent");
372    }
373
374    // Build form data from hidden fields + approval flag.
375    let mut form_data: Vec<(String, String)> = consent_form
376        .hidden_fields
377        .iter()
378        .map(|(k, v)| (k.clone(), v.clone()))
379        .collect();
380    form_data.push(("submit_access".to_string(), "true".to_string()));
381
382    let resp = client
383        .post_form(&consent_form.action_url, &form_data, &[], 15_000)
384        .await?;
385
386    // Check if the response redirected to a callback URL with an auth code.
387    if let Some(code) = extract_auth_code_from_url(&resp.final_url) {
388        return Ok(code);
389    }
390
391    // Try to find the code in the response body (some providers embed it).
392    if let Some(code) = extract_auth_code_from_body(&resp.body) {
393        return Ok(code);
394    }
395
396    bail!(
397        "OAuth consent submission did not yield an auth code (status: {})",
398        resp.status
399    )
400}
401
402/// Submit an MFA code via form POST.
403///
404/// Returns an updated session with cookies from the MFA response.
405pub async fn handle_oauth_mfa(
406    client: &HttpClient,
407    form: &HtmlForm,
408    mfa_code: &str,
409    domain: &str,
410) -> Result<HttpSession> {
411    let mut form_data: Vec<(String, String)> = form
412        .fields
413        .iter()
414        .map(|(k, v)| (k.clone(), v.clone()))
415        .collect();
416
417    // Common MFA field names.
418    let mfa_field_names = ["code", "otp", "mfa_code", "verification_code", "pin"];
419    let mfa_field = form
420        .fields
421        .keys()
422        .find(|k| {
423            let lower = k.to_lowercase();
424            mfa_field_names.iter().any(|n| lower.contains(n))
425        })
426        .cloned();
427
428    if let Some(field_name) = mfa_field {
429        // Update the existing field.
430        for (k, v) in &mut form_data {
431            if k == &field_name {
432                *v = mfa_code.to_string();
433            }
434        }
435    } else {
436        // Use "code" as default field name.
437        form_data.push(("code".to_string(), mfa_code.to_string()));
438    }
439
440    let resp = client
441        .post_form(&form.action_url, &form_data, &[], 15_000)
442        .await?;
443
444    let cookies = parse_set_cookies(&resp.headers);
445    if cookies.is_empty() && resp.status >= 400 {
446        bail!("MFA verification failed: status {}", resp.status);
447    }
448
449    let mut session = HttpSession::new(domain, AuthType::OAuth("mfa".to_string()));
450    for (name, value) in cookies {
451        session.add_cookie(&name, &value);
452    }
453
454    Ok(session)
455}
456
457// ---- Private helpers --------------------------------------------------------
458
459/// Scan HTML for `<a>` tags whose href contains login-related paths.
460fn find_login_links(html: &str, base_url: &str) -> Vec<String> {
461    let link_re =
462        Regex::new(r#"<a\s[^>]*href\s*=\s*["']([^"']+)["'][^>]*>"#).expect("link regex is valid");
463
464    let login_patterns = [
465        "/login",
466        "/signin",
467        "/sign-in",
468        "/auth",
469        "/account/login",
470        "/wp-login.php",
471        "/users/sign_in",
472        "/session/new",
473    ];
474
475    let mut found = Vec::new();
476    for caps in link_re.captures_iter(html) {
477        let href = caps.get(1).map_or("", |m| m.as_str());
478        let href_lower = href.to_lowercase();
479        if login_patterns.iter().any(|p| href_lower.contains(p)) {
480            let resolved = resolve_url(base_url, href);
481            if !found.contains(&resolved) {
482                found.push(resolved);
483            }
484        }
485    }
486    found
487}
488
489/// Parse HTML to find a login form (a `<form>` with a password input).
490///
491/// Returns `LoginMethod::Password` if a suitable form is found, `None` otherwise.
492fn parse_login_form(html: &str, base_url: &str) -> Option<LoginMethod> {
493    // Find <form> blocks that contain a password input.
494    let form_re = Regex::new(r"(?is)<form\b([^>]*)>(.*?)</form>").expect("form regex is valid");
495    let action_re =
496        Regex::new(r#"(?i)action\s*=\s*["']([^"']+)["']"#).expect("action regex is valid");
497    let method_re =
498        Regex::new(r#"(?i)method\s*=\s*["']([^"']+)["']"#).expect("method regex is valid");
499    let input_re = Regex::new(r#"(?i)<input\b([^>]*)>"#).expect("input regex is valid");
500    let attr_name_re =
501        Regex::new(r#"(?i)name\s*=\s*["']([^"']+)["']"#).expect("attr name regex is valid");
502    let attr_type_re =
503        Regex::new(r#"(?i)type\s*=\s*["']([^"']+)["']"#).expect("attr type regex is valid");
504    let attr_value_re =
505        Regex::new(r#"(?i)value\s*=\s*["']([^"']*?)["']"#).expect("attr value regex is valid");
506
507    for form_caps in form_re.captures_iter(html) {
508        let form_attrs = form_caps.get(1).map_or("", |m| m.as_str());
509        let form_body = form_caps.get(2).map_or("", |m| m.as_str());
510
511        // Only consider forms that contain a password field.
512        if !form_body.to_lowercase().contains("type=\"password\"")
513            && !form_body.to_lowercase().contains("type='password'")
514        {
515            continue;
516        }
517
518        let form_action = action_re
519            .captures(form_attrs)
520            .and_then(|c| c.get(1))
521            .map(|m| resolve_url(base_url, m.as_str()))
522            .unwrap_or_else(|| base_url.to_string());
523
524        let method = method_re
525            .captures(form_attrs)
526            .and_then(|c| c.get(1))
527            .map(|m| m.as_str().to_uppercase())
528            .unwrap_or_else(|| "POST".to_string());
529
530        let mut fields = Vec::new();
531        for input_caps in input_re.captures_iter(form_body) {
532            let input_attrs = input_caps.get(1).map_or("", |m| m.as_str());
533
534            let name = match attr_name_re.captures(input_attrs) {
535                Some(c) => c.get(1).map_or("", |m| m.as_str()).to_string(),
536                None => continue, // skip inputs without a name
537            };
538
539            let field_type = attr_type_re
540                .captures(input_attrs)
541                .and_then(|c| c.get(1))
542                .map(|m| m.as_str().to_lowercase())
543                .unwrap_or_else(|| "text".to_string());
544
545            let value = attr_value_re
546                .captures(input_attrs)
547                .and_then(|c| c.get(1))
548                .map(|m| m.as_str().to_string());
549
550            let is_password = field_type == "password";
551            let is_username = !is_password
552                && (field_type == "text" || field_type == "email")
553                && is_username_field_name(&name);
554
555            fields.push(LoginFormField {
556                name,
557                field_type,
558                value,
559                is_username,
560                is_password,
561            });
562        }
563
564        // If we didn't identify any username field, pick the first text/email
565        // field that is not a CSRF token.
566        let has_username = fields.iter().any(|f| f.is_username);
567        if !has_username {
568            if let Some(f) = fields.iter_mut().find(|f| {
569                (f.field_type == "text" || f.field_type == "email") && !is_csrf_field_name(&f.name)
570            }) {
571                f.is_username = true;
572            }
573        }
574
575        return Some(LoginMethod::Password {
576            form_url: base_url.to_string(),
577            form_action,
578            method,
579            fields,
580        });
581    }
582
583    // Check for OAuth-only login pages.
584    detect_oauth_from_html(html)
585}
586
587/// Detect OAuth providers from HTML content.
588fn detect_oauth_from_html(html: &str) -> Option<LoginMethod> {
589    let mut providers = Vec::new();
590
591    let oauth_patterns: &[(&str, &str)] = &[
592        ("accounts.google.com", "google"),
593        ("github.com/login/oauth", "github"),
594        ("facebook.com/v", "facebook"),
595        ("login.microsoftonline.com", "microsoft"),
596        ("appleid.apple.com", "apple"),
597        ("twitter.com/oauth", "twitter"),
598        ("api.twitter.com/oauth", "twitter"),
599    ];
600
601    let html_lower = html.to_lowercase();
602    for (pattern, provider) in oauth_patterns {
603        if html_lower.contains(pattern) && !providers.contains(&provider.to_string()) {
604            providers.push(provider.to_string());
605        }
606    }
607
608    if providers.is_empty() {
609        None
610    } else {
611        Some(LoginMethod::OAuth { providers })
612    }
613}
614
615/// Check if a field name looks like a username/email field.
616fn is_username_field_name(name: &str) -> bool {
617    let lower = name.to_lowercase();
618    lower.contains("user")
619        || lower.contains("email")
620        || lower.contains("login")
621        || lower.contains("account")
622        || lower == "id"
623        || lower == "name"
624        || lower == "username"
625}
626
627/// Check if a field name looks like a CSRF token.
628fn is_csrf_field_name(name: &str) -> bool {
629    let lower = name.to_lowercase();
630    lower.contains("csrf")
631        || lower.contains("_token")
632        || lower == "authenticity_token"
633        || lower.contains("nonce")
634        || lower.contains("xsrf")
635}
636
637/// Parse `Set-Cookie` headers into name-value pairs.
638///
639/// Each `Set-Cookie` header has the form `name=value; attr1; attr2=val2`.
640/// Only the `name=value` portion is extracted.
641fn parse_set_cookies(headers: &[(String, String)]) -> HashMap<String, String> {
642    let mut cookies = HashMap::new();
643
644    for (name, value) in headers {
645        if name.to_lowercase() != "set-cookie" {
646            continue;
647        }
648
649        // The cookie is everything before the first `;`.
650        let cookie_part = value.split(';').next().unwrap_or("");
651        if let Some(eq_pos) = cookie_part.find('=') {
652            let cname = cookie_part[..eq_pos].trim().to_string();
653            let cvalue = cookie_part[eq_pos + 1..].trim().to_string();
654            if !cname.is_empty() {
655                cookies.insert(cname, cvalue);
656            }
657        }
658    }
659
660    cookies
661}
662
663/// Resolve a potentially relative URL against a base URL.
664fn resolve_url(base_url: &str, relative: &str) -> String {
665    if relative.is_empty() {
666        return base_url.to_string();
667    }
668    if relative.starts_with("http://") || relative.starts_with("https://") {
669        return relative.to_string();
670    }
671    if let Ok(base) = url::Url::parse(base_url) {
672        if let Ok(resolved) = base.join(relative) {
673            return resolved.to_string();
674        }
675    }
676    relative.to_string()
677}
678
679// ---- OAuth private helpers --------------------------------------------------
680
681/// Extract an authorization code from a redirect URL's query parameters.
682fn extract_auth_code_from_url(url: &str) -> Option<String> {
683    let parsed = url::Url::parse(url).ok()?;
684    parsed
685        .query_pairs()
686        .find(|(k, _)| k == "code")
687        .map(|(_, v)| v.to_string())
688}
689
690/// Extract an authorization code from an HTML response body.
691fn extract_auth_code_from_body(body: &str) -> Option<String> {
692    let code_re = Regex::new(r#"code['"]\s*(?:value|content)\s*=\s*['"]([^'"]+)['"]"#).ok()?;
693    code_re
694        .captures(body)
695        .and_then(|c| c.get(1))
696        .map(|m| m.as_str().to_string())
697}
698
699/// Check if an HTML page looks like an OAuth consent page.
700fn is_consent_page(html: &str) -> bool {
701    let lower = html.to_lowercase();
702    (lower.contains("consent") || lower.contains("authorize") || lower.contains("grant access"))
703        && (lower.contains("scope") || lower.contains("permission"))
704}
705
706/// Check if an HTML page looks like an MFA challenge page.
707fn is_mfa_page(html: &str) -> bool {
708    let lower = html.to_lowercase();
709    (lower.contains("verification")
710        || lower.contains("2-step")
711        || lower.contains("two-factor")
712        || lower.contains("mfa")
713        || lower.contains("authenticator"))
714        && (lower.contains("<form") || lower.contains("<input"))
715}
716
717/// Detect the type of MFA challenge from page content.
718fn detect_mfa_type(html: &str) -> MfaType {
719    let lower = html.to_lowercase();
720    if lower.contains("authenticator") || lower.contains("totp") || lower.contains("6-digit") {
721        MfaType::Totp
722    } else if lower.contains("sms") || lower.contains("text message") || lower.contains("phone") {
723        MfaType::Sms
724    } else if lower.contains("email") && lower.contains("code") {
725        MfaType::Email
726    } else if lower.contains("push") || lower.contains("notification") || lower.contains("duo") {
727        MfaType::Push
728    } else {
729        MfaType::Unknown
730    }
731}
732
733/// Extract OAuth scopes from a consent page.
734fn extract_oauth_scopes(html: &str) -> Vec<String> {
735    let mut scopes = Vec::new();
736
737    // Look for scope items in list elements.
738    let scope_re = Regex::new(r#"(?i)<li[^>]*class="[^"]*scope[^"]*"[^>]*>([^<]+)</li>"#).unwrap();
739    for caps in scope_re.captures_iter(html) {
740        if let Some(m) = caps.get(1) {
741            let scope = m.as_str().trim().to_string();
742            if !scope.is_empty() && !scopes.contains(&scope) {
743                scopes.push(scope);
744            }
745        }
746    }
747
748    // Fallback: look for common scope keywords.
749    if scopes.is_empty() {
750        let scope_keywords = ["email", "profile", "openid", "read", "write"];
751        let lower = html.to_lowercase();
752        for keyword in &scope_keywords {
753            if lower.contains(keyword) {
754                scopes.push(keyword.to_string());
755            }
756        }
757    }
758
759    scopes
760}
761
762/// Extract the application name from a consent page.
763fn extract_app_name(html: &str) -> String {
764    // Try to find app name in common patterns.
765    let app_re =
766        Regex::new(r#"(?i)(?:<strong>|<b>|class="[^"]*app[_-]?name[^"]*"[^>]*>)([^<]+)<"#).unwrap();
767
768    if let Some(caps) = app_re.captures(html) {
769        if let Some(m) = caps.get(1) {
770            return m.as_str().trim().to_string();
771        }
772    }
773
774    "Unknown Application".to_string()
775}
776
777/// Parse a consent form from an OAuth consent page.
778fn parse_consent_form(html: &str, base_url: &str, provider: &str) -> Option<ConsentForm> {
779    let form = parse_first_form(html, base_url)?;
780
781    Some(ConsentForm {
782        action_url: form.action_url,
783        hidden_fields: form.fields,
784        provider: provider.to_string(),
785    })
786}
787
788/// Parse the first `<form>` tag from HTML into an HtmlForm.
789fn parse_first_form(html: &str, base_url: &str) -> Option<HtmlForm> {
790    let form_re = Regex::new(r"(?is)<form\b([^>]*)>(.*?)</form>").ok()?;
791    let action_re = Regex::new(r#"(?i)action\s*=\s*["']([^"']+)["']"#).ok()?;
792    let method_re = Regex::new(r#"(?i)method\s*=\s*["']([^"']+)["']"#).ok()?;
793    let input_re = Regex::new(r#"(?i)<input\b([^>]*)>"#).ok()?;
794    let name_re = Regex::new(r#"(?i)name\s*=\s*["']([^"']+)["']"#).ok()?;
795    let value_re = Regex::new(r#"(?i)value\s*=\s*["']([^"']*?)["']"#).ok()?;
796
797    let form_caps = form_re.captures(html)?;
798    let form_attrs = form_caps.get(1).map_or("", |m| m.as_str());
799    let form_body = form_caps.get(2).map_or("", |m| m.as_str());
800
801    let action_url = action_re
802        .captures(form_attrs)
803        .and_then(|c| c.get(1))
804        .map(|m| resolve_url(base_url, m.as_str()))
805        .unwrap_or_else(|| base_url.to_string());
806
807    let method = method_re
808        .captures(form_attrs)
809        .and_then(|c| c.get(1))
810        .map(|m| m.as_str().to_uppercase())
811        .unwrap_or_else(|| "POST".to_string());
812
813    let mut fields = HashMap::new();
814    for input_caps in input_re.captures_iter(form_body) {
815        let input_attrs = input_caps.get(1).map_or("", |m| m.as_str());
816        if let Some(name_cap) = name_re.captures(input_attrs) {
817            let name = name_cap.get(1).map_or("", |m| m.as_str()).to_string();
818            let value = value_re
819                .captures(input_attrs)
820                .and_then(|c| c.get(1))
821                .map(|m| m.as_str().to_string())
822                .unwrap_or_default();
823            if !name.is_empty() {
824                fields.insert(name, value);
825            }
826        }
827    }
828
829    Some(HtmlForm {
830        action_url,
831        method,
832        fields,
833    })
834}
835
836// ---- Tests ------------------------------------------------------------------
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841
842    #[test]
843    fn test_find_login_links() {
844        let html = r#"
845        <html><body>
846            <a href="/about">About</a>
847            <a href="/login">Log In</a>
848            <a href="/products">Products</a>
849            <a href="/account/login">My Account</a>
850        </body></html>
851        "#;
852
853        let links = find_login_links(html, "https://example.com");
854        assert_eq!(links.len(), 2);
855        assert!(links.contains(&"https://example.com/login".to_string()));
856        assert!(links.contains(&"https://example.com/account/login".to_string()));
857    }
858
859    #[test]
860    fn test_parse_login_form() {
861        let html = r#"
862        <html><body>
863            <form action="/auth/login" method="POST">
864                <input type="hidden" name="csrf_token" value="abc123" />
865                <input type="email" name="email" />
866                <input type="password" name="password" />
867                <button type="submit">Sign In</button>
868            </form>
869        </body></html>
870        "#;
871
872        let method = parse_login_form(html, "https://example.com");
873        assert!(method.is_some());
874
875        if let Some(LoginMethod::Password {
876            form_action,
877            method,
878            fields,
879            ..
880        }) = method
881        {
882            assert_eq!(form_action, "https://example.com/auth/login");
883            assert_eq!(method, "POST");
884            assert_eq!(fields.len(), 3);
885
886            let csrf = fields.iter().find(|f| f.name == "csrf_token").unwrap();
887            assert_eq!(csrf.field_type, "hidden");
888            assert_eq!(csrf.value.as_deref(), Some("abc123"));
889            assert!(!csrf.is_username);
890            assert!(!csrf.is_password);
891
892            let email = fields.iter().find(|f| f.name == "email").unwrap();
893            assert!(email.is_username);
894            assert!(!email.is_password);
895
896            let pw = fields.iter().find(|f| f.name == "password").unwrap();
897            assert!(!pw.is_username);
898            assert!(pw.is_password);
899        } else {
900            panic!("expected LoginMethod::Password");
901        }
902    }
903
904    #[test]
905    fn test_parse_login_form_oauth() {
906        let html = r#"
907        <html><body>
908            <a href="https://accounts.google.com/o/oauth2/auth?client_id=123">
909                Sign in with Google
910            </a>
911            <a href="https://github.com/login/oauth/authorize?client_id=456">
912                Sign in with GitHub
913            </a>
914        </body></html>
915        "#;
916
917        let method = parse_login_form(html, "https://example.com");
918        assert!(method.is_some());
919
920        if let Some(LoginMethod::OAuth { providers }) = method {
921            assert!(providers.contains(&"google".to_string()));
922            assert!(providers.contains(&"github".to_string()));
923        } else {
924            panic!("expected LoginMethod::OAuth");
925        }
926    }
927
928    #[test]
929    fn test_login_api_key() {
930        let session = login_api_key("api.example.com", "my-secret-key", "X-Api-Key");
931
932        assert_eq!(session.domain, "api.example.com");
933        assert_eq!(session.auth_type, AuthType::ApiKey);
934        assert_eq!(
935            session.auth_headers.get("X-Api-Key").unwrap(),
936            "my-secret-key"
937        );
938        assert!(session.cookies.is_empty());
939    }
940
941    #[test]
942    fn test_login_bearer() {
943        let session = login_bearer("api.example.com", "tok_abc123");
944
945        assert_eq!(session.domain, "api.example.com");
946        assert_eq!(session.auth_type, AuthType::Bearer);
947        assert_eq!(
948            session.auth_headers.get("Authorization").unwrap(),
949            "Bearer tok_abc123"
950        );
951        assert!(session.cookies.is_empty());
952    }
953
954    #[test]
955    fn test_parse_set_cookies() {
956        let headers = vec![
957            ("content-type".to_string(), "text/html".to_string()),
958            (
959                "set-cookie".to_string(),
960                "session_id=abc123; Path=/; HttpOnly".to_string(),
961            ),
962            (
963                "set-cookie".to_string(),
964                "csrftoken=xyz789; Secure; SameSite=Strict".to_string(),
965            ),
966            (
967                "set-cookie".to_string(),
968                "pref=dark; Max-Age=3600".to_string(),
969            ),
970        ];
971
972        let cookies = parse_set_cookies(&headers);
973        assert_eq!(cookies.len(), 3);
974        assert_eq!(cookies.get("session_id").unwrap(), "abc123");
975        assert_eq!(cookies.get("csrftoken").unwrap(), "xyz789");
976        assert_eq!(cookies.get("pref").unwrap(), "dark");
977    }
978
979    #[test]
980    fn test_find_login_links_absolute_url() {
981        let html = r#"
982        <html><body>
983            <a href="https://auth.example.com/signin">Sign In</a>
984        </body></html>
985        "#;
986
987        let links = find_login_links(html, "https://example.com");
988        assert_eq!(links.len(), 1);
989        assert_eq!(links[0], "https://auth.example.com/signin");
990    }
991
992    #[test]
993    fn test_find_login_links_no_matches() {
994        let html = r#"
995        <html><body>
996            <a href="/about">About</a>
997            <a href="/products">Products</a>
998        </body></html>
999        "#;
1000
1001        let links = find_login_links(html, "https://example.com");
1002        assert!(links.is_empty());
1003    }
1004
1005    #[test]
1006    fn test_extract_auth_code_from_url() {
1007        let url = "https://example.com/callback?code=abc123&state=xyz";
1008        assert_eq!(extract_auth_code_from_url(url), Some("abc123".to_string()));
1009
1010        let no_code = "https://example.com/callback?error=denied";
1011        assert_eq!(extract_auth_code_from_url(no_code), None);
1012    }
1013
1014    #[test]
1015    fn test_is_consent_page() {
1016        let consent_html = r#"
1017        <html><body>
1018            <h1>ExampleApp wants to access your account</h1>
1019            <p>This app is requesting the following permissions (scope):</p>
1020            <ul><li>View your email</li><li>View your profile</li></ul>
1021            <form action="/consent" method="POST">
1022                <input type="hidden" name="state" value="abc" />
1023                <button name="submit_access" value="true">Grant access</button>
1024            </form>
1025        </body></html>
1026        "#;
1027        assert!(is_consent_page(consent_html));
1028
1029        let normal_html = "<html><body><h1>Welcome</h1></body></html>";
1030        assert!(!is_consent_page(normal_html));
1031    }
1032
1033    #[test]
1034    fn test_is_mfa_page() {
1035        let mfa_html = r#"
1036        <html><body>
1037            <h1>2-Step Verification</h1>
1038            <p>Enter the 6-digit code from your authenticator app</p>
1039            <form action="/verify" method="POST">
1040                <input type="text" name="code" />
1041                <button type="submit">Verify</button>
1042            </form>
1043        </body></html>
1044        "#;
1045        assert!(is_mfa_page(mfa_html));
1046
1047        let normal = "<html><body><h1>Login</h1></body></html>";
1048        assert!(!is_mfa_page(normal));
1049    }
1050
1051    #[test]
1052    fn test_detect_mfa_type() {
1053        assert!(matches!(
1054            detect_mfa_type("Enter the 6-digit code from your authenticator app"),
1055            MfaType::Totp
1056        ));
1057        assert!(matches!(
1058            detect_mfa_type("We sent a code via SMS to your phone"),
1059            MfaType::Sms
1060        ));
1061        assert!(matches!(
1062            detect_mfa_type("Check your email for a verification code"),
1063            MfaType::Email
1064        ));
1065        assert!(matches!(
1066            detect_mfa_type("Approve the push notification on your Duo app"),
1067            MfaType::Push
1068        ));
1069    }
1070
1071    #[test]
1072    fn test_parse_consent_form() {
1073        let html = r#"
1074        <html><body>
1075            <form action="/oauth/approve" method="POST">
1076                <input type="hidden" name="state" value="xyz789" />
1077                <input type="hidden" name="client_id" value="app123" />
1078                <input type="hidden" name="scope" value="email profile" />
1079                <button name="submit_access" value="true">Allow</button>
1080            </form>
1081        </body></html>
1082        "#;
1083
1084        let form = parse_consent_form(html, "https://accounts.example.com", "example");
1085        assert!(form.is_some());
1086
1087        let f = form.unwrap();
1088        assert_eq!(f.action_url, "https://accounts.example.com/oauth/approve");
1089        assert_eq!(f.provider, "example");
1090        assert_eq!(f.hidden_fields.get("state").unwrap(), "xyz789");
1091        assert_eq!(f.hidden_fields.get("client_id").unwrap(), "app123");
1092    }
1093
1094    #[test]
1095    fn test_parse_first_form() {
1096        let html = r#"
1097        <form action="/submit" method="POST">
1098            <input type="hidden" name="token" value="abc" />
1099            <input type="text" name="code" value="" />
1100        </form>
1101        "#;
1102
1103        let form = parse_first_form(html, "https://example.com").unwrap();
1104        assert_eq!(form.action_url, "https://example.com/submit");
1105        assert_eq!(form.method, "POST");
1106        assert_eq!(form.fields.get("token").unwrap(), "abc");
1107        assert!(form.fields.contains_key("code"));
1108    }
1109
1110    #[test]
1111    fn test_parse_login_form_no_password_field() {
1112        let html = r#"
1113        <html><body>
1114            <form action="/search" method="GET">
1115                <input type="text" name="q" />
1116                <button type="submit">Search</button>
1117            </form>
1118        </body></html>
1119        "#;
1120
1121        let method = parse_login_form(html, "https://example.com");
1122        // No password field, so no login form detected. May return OAuth or None.
1123        if let Some(LoginMethod::Password { .. }) = method {
1124            panic!("should not detect a password login form without a password field");
1125        }
1126    }
1127}