lonkero 3.6.2 - Docs.rs

// Copyright (c) 2026 Bountyy Oy. All rights reserved.
// This software is proprietary and confidential.

//! Intelligent Scan Orchestrator
//!
//! This module provides context-aware scanning that replaces the old mode-based approach.
//! Instead of --mode fast|normal|thorough|insane, it uses:
//!
//! 1. Technology detection to select relevant scanners
//! 2. Endpoint deduplication to avoid testing the same form 50+ times
//! 3. Parameter risk scoring to determine payload intensity per-parameter
//!
//! The key insight: when tech detection fails, we run MORE tests, not fewer.

use std::collections::HashSet;
use tracing::info;

use crate::crawler::CrawlResults;

use super::attack_surface::{
    AttackSurface, DeduplicatedTargets, ParameterSource as AttackSurfaceParamSource,
};
use super::parameter_prioritizer::{
    ParameterInfo, ParameterPrioritizer, ParameterRisk, ParameterSource as PrioritizerParamSource,
};
use super::registry::{PayloadIntensity, ScannerRegistry, ScannerType, TechCategory};

/// Statistics from intelligent scan orchestration
#[derive(Debug, Clone, Default)]
pub struct OrchestrationStats {
    /// Total targets before deduplication
    pub total_original: usize,
    /// Total targets after deduplication
    pub total_deduplicated: usize,
    /// Overall reduction percentage
    pub reduction_percent: f32,
    /// Parameters by risk level
    pub high_risk_params: usize,
    pub medium_risk_params: usize,
    pub low_risk_params: usize,
    /// Scanners selected
    pub scanners_selected: usize,
    /// Technologies detected
    pub technologies_detected: Vec<String>,
}

/// A prioritized parameter ready for testing
#[derive(Debug, Clone)]
pub struct PrioritizedParameter {
    /// Parameter name
    pub name: String,
    /// Risk score (0-100)
    pub risk_score: u32,
    /// Payload intensity based on risk
    pub intensity: PayloadIntensity,
    /// Suggested scanners for this parameter
    pub suggested_scanners: Vec<ScannerType>,
    /// Risk factors detected
    pub risk_factors: Vec<String>,
}

/// Intelligent scan plan generated by the orchestrator
#[derive(Debug, Clone)]
pub struct IntelligentScanPlan {
    /// Scanners to run (in priority order)
    pub scanners: Vec<ScannerType>,
    /// Deduplicated targets
    pub targets: DeduplicatedTargets,
    /// Prioritized parameters with risk scores
    pub prioritized_params: Vec<PrioritizedParameter>,
    /// Detected technologies
    pub technologies: Vec<TechCategory>,
    /// Orchestration statistics
    pub stats: OrchestrationStats,
}

/// Intelligent Scan Orchestrator
///
/// Replaces the old mode-based scanning with context-aware orchestration.
pub struct IntelligentScanOrchestrator {
    /// Scanner registry for tech-aware routing
    registry: ScannerRegistry,
    /// Parameter prioritizer for risk scoring
    prioritizer: ParameterPrioritizer,
}

impl Default for IntelligentScanOrchestrator {
    fn default() -> Self {
        Self::new()
    }
}

impl IntelligentScanOrchestrator {
    /// Create a new intelligent scan orchestrator
    pub fn new() -> Self {
        Self {
            registry: ScannerRegistry::new(),
            prioritizer: ParameterPrioritizer::new(),
        }
    }

    /// Generate an intelligent scan plan from crawl results and tech detection
    ///
    /// This replaces the old mode-based approach. Instead of:
    /// - --mode fast: 50 payloads globally
    /// - --mode normal: 500 payloads globally
    /// - --mode thorough: 5000 payloads globally
    /// - --mode insane: all payloads globally
    ///
    /// We now use:
    /// - Context-aware scanner selection based on detected tech
    /// - Per-parameter payload intensity based on risk score
    /// - Endpoint/form deduplication to avoid redundant testing
    pub fn generate_scan_plan(
        &self,
        crawl_results: &CrawlResults,
        detected_technologies: &[TechCategory],
        target_url: &str,
    ) -> IntelligentScanPlan {
        info!("[Orchestrator] Generating intelligent scan plan");

        let mut stats = OrchestrationStats::default();

        // Step 1: Build attack surface from crawl results
        let mut attack_surface = AttackSurface::new();

        // Add crawled endpoints with their parameters
        for endpoint in &crawl_results.api_endpoints {
            // Extract parameters from endpoint URL
            let params: Vec<String> = if let Ok(parsed) = url::Url::parse(endpoint) {
                parsed.query_pairs().map(|(k, _)| k.to_string()).collect()
            } else {
                Vec::new()
            };
            attack_surface.add_endpoint(endpoint, "GET", &params);
        }

        // Add crawled forms (DiscoveredForm already has Vec<FormInput>)
        for form in &crawl_results.forms {
            attack_surface.add_form(&form.action, &form.method, &form.inputs);
        }

        // Add parameters from crawl results
        for (endpoint, params) in &crawl_results.parameters {
            for param in params {
                attack_surface.add_parameter(param, AttackSurfaceParamSource::Url, endpoint);
            }
        }

        // Add parameters from target URL
        attack_surface.add_url_parameters(target_url);

        // Step 2: Deduplicate the attack surface
        let deduplicated = attack_surface.build();

        stats.total_original = deduplicated.total_original;
        stats.total_deduplicated = deduplicated.total_deduplicated;
        stats.reduction_percent = deduplicated.reduction_percent;

        info!(
            "[Orchestrator] Deduplication: {}/{} targets ({:.1}% reduction)",
            stats.total_deduplicated, stats.total_original, stats.reduction_percent
        );

        // Step 3: Get intelligent scanner configuration
        let (scanners, _base_intensity) = self.registry.get_intelligent_scan_config(
            detected_technologies,
            50, // Base intensity, will be overridden per-parameter
        );

        stats.scanners_selected = scanners.len();
        stats.technologies_detected = detected_technologies
            .iter()
            .map(|t| format!("{:?}", t))
            .collect();

        info!(
            "[Orchestrator] Selected {} scanners for {} technologies",
            stats.scanners_selected,
            stats.technologies_detected.len()
        );

        // Step 4: Prioritize parameters
        let prioritized_params = self.prioritize_parameters(&deduplicated, detected_technologies);

        // Count risk levels
        for param in &prioritized_params {
            match param.risk_score {
                0..=33 => stats.low_risk_params += 1,
                34..=66 => stats.medium_risk_params += 1,
                _ => stats.high_risk_params += 1,
            }
        }

        info!(
            "[Orchestrator] Parameter risk distribution: {} high, {} medium, {} low",
            stats.high_risk_params, stats.medium_risk_params, stats.low_risk_params
        );

        IntelligentScanPlan {
            scanners,
            targets: deduplicated,
            prioritized_params,
            technologies: detected_technologies.to_vec(),
            stats,
        }
    }

    /// Prioritize all unique parameters from the deduplicated targets
    fn prioritize_parameters(
        &self,
        deduplicated: &DeduplicatedTargets,
        technologies: &[TechCategory],
    ) -> Vec<PrioritizedParameter> {
        let mut prioritized = Vec::new();

        for test_param in &deduplicated.unique_parameters {
            // Determine primary source
            let source = if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::Form)
            {
                PrioritizerParamSource::Form
            } else if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::Url)
            {
                PrioritizerParamSource::URL
            } else if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::JsonBody)
            {
                PrioritizerParamSource::JSON
            } else if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::Header)
            {
                PrioritizerParamSource::Header
            } else if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::Cookie)
            {
                PrioritizerParamSource::Cookie
            } else if test_param
                .context
                .sources
                .contains(&AttackSurfaceParamSource::PathSegment)
            {
                PrioritizerParamSource::Path
            } else {
                PrioritizerParamSource::Unknown
            };

            // Get endpoint for context
            let endpoint = test_param
                .context
                .endpoints_seen
                .first()
                .cloned()
                .unwrap_or_default();

            let param_info = ParameterInfo {
                name: test_param.name.clone(),
                value: test_param.context.sample_values.first().cloned(),
                input_type: "text".to_string(),
                source,
                endpoint_url: endpoint,
                form_context: None,
            };

            let risk = self.prioritizer.score_parameter(&param_info);
            let risk_score_u32 = risk.score as u32;
            let intensity = PayloadIntensity::from_risk_score(risk_score_u32);
            let suggested = self.get_suggested_scanners(&risk, technologies);

            prioritized.push(PrioritizedParameter {
                name: test_param.name.clone(),
                risk_score: risk_score_u32,
                intensity,
                suggested_scanners: suggested,
                risk_factors: risk
                    .risk_factors
                    .iter()
                    .map(|f| format!("{:?}", f))
                    .collect(),
            });
        }

        // Sort by risk (highest first)
        prioritized.sort_by(|a, b| b.risk_score.cmp(&a.risk_score));
        prioritized
    }

    /// Get suggested scanners based on parameter risk factors
    fn get_suggested_scanners(
        &self,
        risk: &ParameterRisk,
        technologies: &[TechCategory],
    ) -> Vec<ScannerType> {
        use super::parameter_prioritizer::RiskFactor;

        let mut scanners = HashSet::new();

        // Map risk factors to scanner types
        for factor in &risk.risk_factors {
            match factor {
                RiskFactor::CommandParameter => {
                    scanners.insert(ScannerType::CommandInjection);
                    scanners.insert(ScannerType::CodeInjection);
                }
                RiskFactor::FileParameter => {
                    scanners.insert(ScannerType::PathTraversal);
                    scanners.insert(ScannerType::FileUpload);
                }
                RiskFactor::UrlParameter => {
                    scanners.insert(ScannerType::Ssrf);
                    scanners.insert(ScannerType::SsrfBlind);
                    scanners.insert(ScannerType::OpenRedirect);
                }
                RiskFactor::IdParameter => {
                    scanners.insert(ScannerType::Idor);
                    scanners.insert(ScannerType::Bola);
                    scanners.insert(ScannerType::SqlI);
                }
                RiskFactor::AuthRelated => {
                    scanners.insert(ScannerType::AuthBypass);
                    scanners.insert(ScannerType::Jwt);
                }
                RiskFactor::EmailParameter => {
                    scanners.insert(ScannerType::EmailHeaderInjection);
                }
                RiskFactor::SearchParameter => {
                    scanners.insert(ScannerType::Xss);
                    scanners.insert(ScannerType::SqlI);
                }
                RiskFactor::AdminIndicator => {
                    scanners.insert(ScannerType::AuthBypass);
                    scanners.insert(ScannerType::Idor);
                }
                RiskFactor::DebugParameter => {
                    scanners.insert(ScannerType::InformationDisclosure);
                }
                _ => {}
            }
        }

        // Add tech-specific scanners for JSON contexts
        if technologies
            .iter()
            .any(|t| matches!(t, TechCategory::JavaScript(_)))
        {
            scanners.insert(ScannerType::PrototypePollution);
        }

        // Always include XSS for user input
        if risk.score > 20 {
            scanners.insert(ScannerType::Xss);
        }

        scanners.into_iter().collect()
    }

    /// Get the scanner registry for direct access
    pub fn registry(&self) -> &ScannerRegistry {
        &self.registry
    }

    /// Get the parameter prioritizer for direct access
    pub fn prioritizer(&self) -> &ParameterPrioritizer {
        &self.prioritizer
    }

    /// Check if a scanner should run for the given technologies
    pub fn should_run_scanner(&self, scanner: &ScannerType, technologies: &[TechCategory]) -> bool {
        // Universal and core scanners always run
        if self.registry.is_universal(scanner) || self.registry.is_core(scanner) {
            return true;
        }

        // Check if any tech allows this scanner
        for tech in technologies {
            if !self.registry.should_skip(scanner, tech) {
                return true;
            }
        }

        // If no tech specified or all unknown, check fallback
        if technologies.is_empty()
            || technologies
                .iter()
                .all(|t| matches!(t, TechCategory::Unknown))
        {
            return self.registry.get_fallback_scanners().contains(scanner);
        }

        false
    }

    /// Get payload intensity for a specific parameter
    pub fn get_parameter_intensity(&self, param_name: &str) -> PayloadIntensity {
        let param_info = ParameterInfo {
            name: param_name.to_string(),
            value: None,
            input_type: "text".to_string(),
            source: PrioritizerParamSource::Unknown,
            endpoint_url: String::new(),
            form_context: None,
        };

        let risk = self.prioritizer.score_parameter(&param_info);
        PayloadIntensity::from_risk_score(risk.score as u32)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_orchestrator_creation() {
        let orchestrator = IntelligentScanOrchestrator::new();
        assert!(!orchestrator.registry().get_universal_scanners().is_empty());
        assert!(!orchestrator.registry().get_core_scanners().is_empty());
    }

    #[test]
    fn test_should_run_scanner_universal() {
        let orchestrator = IntelligentScanOrchestrator::new();

        // Universal scanners should always run
        assert!(orchestrator.should_run_scanner(&ScannerType::Cors, &[]));
        assert!(orchestrator.should_run_scanner(&ScannerType::SecurityHeaders, &[]));
    }

    #[test]
    fn test_should_run_scanner_core() {
        let orchestrator = IntelligentScanOrchestrator::new();

        // Core scanners should always run
        assert!(orchestrator.should_run_scanner(&ScannerType::Xss, &[]));
        assert!(orchestrator.should_run_scanner(&ScannerType::SqlI, &[]));
        assert!(orchestrator.should_run_scanner(&ScannerType::Ssrf, &[]));
    }

    #[test]
    fn test_parameter_intensity_high_risk() {
        let orchestrator = IntelligentScanOrchestrator::new();

        // High-risk parameter names should get Maximum intensity
        let intensity = orchestrator.get_parameter_intensity("password");
        assert!(matches!(
            intensity,
            PayloadIntensity::Extended | PayloadIntensity::Maximum
        ));

        let intensity = orchestrator.get_parameter_intensity("cmd");
        assert!(matches!(
            intensity,
            PayloadIntensity::Extended | PayloadIntensity::Maximum
        ));
    }

    #[test]
    fn test_parameter_intensity_low_risk() {
        let orchestrator = IntelligentScanOrchestrator::new();

        // Low-risk parameter names should get lower intensity
        let intensity = orchestrator.get_parameter_intensity("page");
        assert!(matches!(
            intensity,
            PayloadIntensity::Minimal | PayloadIntensity::Standard
        ));
    }
}