Skip to main content

lonkero_scanner/
types.rs

1// Copyright (c) 2026 Bountyy Oy. All rights reserved.
2// This software is proprietary and confidential.
3
4use crate::signing::ReportSignature;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Scan mode determines the intensity and scope of the security scan
9#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
10#[serde(rename_all = "lowercase")]
11pub enum ScanMode {
12    /// Legacy mode: 50 payloads globally
13    Fast,
14    /// Legacy mode: 500 payloads globally
15    Normal,
16    /// Legacy mode: 5000 payloads globally
17    Thorough,
18    /// Legacy mode: unlimited payloads globally
19    Insane,
20    /// Intelligent context-aware mode (v3.0 default)
21    /// Uses tech detection, endpoint deduplication, and per-parameter risk scoring
22    Intelligent,
23}
24
25impl Default for ScanMode {
26    fn default() -> Self {
27        // v3.0: Intelligent mode is now the default
28        ScanMode::Intelligent
29    }
30}
31
32impl std::fmt::Display for ScanMode {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            ScanMode::Fast => write!(f, "fast"),
36            ScanMode::Normal => write!(f, "normal"),
37            ScanMode::Thorough => write!(f, "thorough"),
38            ScanMode::Insane => write!(f, "insane"),
39            ScanMode::Intelligent => write!(f, "intelligent"),
40        }
41    }
42}
43
44impl ScanMode {
45    pub fn as_str(&self) -> &'static str {
46        match self {
47            ScanMode::Fast => "fast",
48            ScanMode::Normal => "normal",
49            ScanMode::Thorough => "thorough",
50            ScanMode::Insane => "insane",
51            ScanMode::Intelligent => "intelligent",
52        }
53    }
54
55    /// Returns true if this is the intelligent context-aware mode
56    pub fn is_intelligent(&self) -> bool {
57        matches!(self, ScanMode::Intelligent)
58    }
59
60    /// Returns true if this is a legacy mode (fast/normal/thorough/insane)
61    pub fn is_legacy(&self) -> bool {
62        !self.is_intelligent()
63    }
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct ScanJob {
68    pub scan_id: String,
69    pub target: String,
70    pub config: ScanConfig,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74#[serde(rename_all = "camelCase")]
75pub struct ScanConfig {
76    #[serde(default)]
77    pub scan_mode: ScanMode,
78
79    #[serde(default)]
80    pub enable_crawler: bool,
81
82    #[serde(default = "default_max_depth")]
83    pub max_depth: u32,
84
85    #[serde(default = "default_max_pages")]
86    pub max_pages: u32,
87
88    #[serde(default)]
89    pub enum_subdomains: bool,
90
91    #[serde(default)]
92    pub auth_cookie: Option<String>,
93
94    #[serde(default)]
95    pub auth_token: Option<String>,
96
97    #[serde(default)]
98    pub auth_basic: Option<String>,
99
100    #[serde(default)]
101    pub custom_headers: Option<HashMap<String, String>>,
102
103    /// Only run these specific scanner modules (empty = run all)
104    #[serde(default)]
105    pub only_modules: Vec<String>,
106
107    /// Skip these specific scanner modules
108    #[serde(default)]
109    pub skip_modules: Vec<String>,
110}
111
112impl ScanConfig {
113    /// Check if a module should run based on --only and --skip filters.
114    /// Returns true if the module is allowed to run.
115    pub fn should_run_module(&self, module_id: &str) -> bool {
116        // If --only is specified, only run modules in the list
117        if !self.only_modules.is_empty() {
118            if !self.only_modules.iter().any(|m| m == module_id) {
119                return false;
120            }
121        }
122        // If --skip is specified, skip modules in the list
123        if self.skip_modules.iter().any(|m| m == module_id) {
124            return false;
125        }
126        true
127    }
128
129    /// Check if ANY module from a list should run (for phase-level gating).
130    pub fn should_run_any_module(&self, module_ids: &[&str]) -> bool {
131        if self.only_modules.is_empty() {
132            // No filter, check skip list
133            return module_ids.iter().any(|id| !self.skip_modules.contains(&id.to_string()));
134        }
135        // Check if any of the given modules are in the only list
136        module_ids.iter().any(|id| self.should_run_module(id))
137    }
138}
139
140fn default_max_depth() -> u32 {
141    3
142}
143
144fn default_max_pages() -> u32 {
145    1000
146}
147
148impl Default for ScanConfig {
149    fn default() -> Self {
150        Self {
151            scan_mode: ScanMode::Fast,
152            enable_crawler: false,
153            max_depth: 3,
154            max_pages: 1000,
155            enum_subdomains: false,
156            auth_cookie: None,
157            auth_token: None,
158            auth_basic: None,
159            custom_headers: None,
160            only_modules: Vec::new(),
161            skip_modules: Vec::new(),
162        }
163    }
164}
165
166impl ScanConfig {
167    /// Get the global payload count limit for legacy modes.
168    /// For Intelligent mode, this returns 0 as payload intensity is determined per-parameter.
169    pub fn payload_count(&self) -> usize {
170        match self.scan_mode {
171            ScanMode::Fast => 50,
172            ScanMode::Normal => 500,
173            ScanMode::Thorough => 5000,
174            ScanMode::Insane => usize::MAX, // All payloads
175            // Intelligent mode uses per-parameter payload intensity, not global count
176            ScanMode::Intelligent => 0,
177        }
178    }
179
180    /// Determine if cloud/container security scanning should run
181    /// Enabled for Thorough, Insane, and Intelligent modes
182    pub fn enable_cloud_scanning(&self) -> bool {
183        matches!(
184            self.scan_mode,
185            ScanMode::Thorough | ScanMode::Insane | ScanMode::Intelligent
186        )
187    }
188
189    /// Determine if extended subdomain enumeration should be used
190    /// Enabled for Thorough, Insane, and Intelligent modes
191    pub fn subdomain_extended(&self) -> bool {
192        matches!(
193            self.scan_mode,
194            ScanMode::Thorough | ScanMode::Insane | ScanMode::Intelligent
195        )
196    }
197}
198
199#[derive(Debug, Clone, Serialize, Deserialize)]
200#[serde(rename_all = "camelCase")]
201pub struct ScanResults {
202    pub scan_id: String,
203    pub target: String,
204    pub tests_run: u64,
205    pub vulnerabilities: Vec<Vulnerability>,
206    pub started_at: String,
207    pub completed_at: String,
208    pub duration_seconds: f64,
209    #[serde(default)]
210    pub early_terminated: bool,
211    #[serde(default)]
212    pub termination_reason: Option<String>,
213    /// Scanner version and build info
214    #[serde(default)]
215    pub scanner_version: Option<String>,
216    /// License signature watermark (for audit trail) - DEPRECATED: Use quantum_signature
217    #[serde(default, skip_serializing_if = "Option::is_none")]
218    pub license_signature: Option<String>,
219    /// Quantum-safe cryptographic signature for result verification
220    /// This signature is created by the Bountyy signing service and proves:
221    /// 1. The scan was authorized before execution
222    /// 2. The results have not been tampered with
223    /// 3. The scan was performed by a legitimate Lonkero scanner
224    #[serde(default, skip_serializing_if = "Option::is_none")]
225    pub quantum_signature: Option<ReportSignature>,
226    /// Scan authorization token ID (for audit correlation)
227    #[serde(default, skip_serializing_if = "Option::is_none")]
228    pub authorization_token_id: Option<String>,
229}
230
231/// Response data captured for ML learning (GDPR-compliant)
232/// Stores only extracted features, NOT raw response bodies
233/// This is embedded in vulnerabilities when ML is enabled
234#[derive(Debug, Clone)]
235pub struct MlResponseData {
236    /// Extracted features from the response (GDPR-safe - no raw data)
237    pub features: crate::ml::VulnFeatures,
238    /// The payload type/category (not the actual payload content for privacy)
239    pub payload_category: Option<String>,
240}
241
242/// Simplified HTTP response metadata for ML (GDPR-compliant)
243/// Only stores metadata, not actual response bodies
244#[derive(Debug, Clone)]
245pub struct MlHttpResponse {
246    pub status_code: u16,
247    pub body_length: usize,
248    pub duration_ms: u64,
249    pub content_type: Option<String>,
250}
251
252impl MlHttpResponse {
253    /// Create from an http_client::HttpResponse (stores metadata only)
254    pub fn from_http_response(resp: &crate::http_client::HttpResponse) -> Self {
255        Self {
256            status_code: resp.status_code,
257            body_length: resp.body.len(),
258            duration_ms: resp.duration_ms,
259            content_type: resp.headers.get("content-type").cloned(),
260        }
261    }
262}
263
264#[derive(Debug, Clone, Serialize, Deserialize, Default)]
265#[serde(rename_all = "camelCase", default)]
266pub struct Vulnerability {
267    pub id: String,
268    #[serde(rename = "type")]
269    pub vuln_type: String,
270    #[serde(default)]
271    pub severity: Severity,
272    #[serde(default)]
273    pub confidence: Confidence,
274    pub category: String,
275    pub url: String,
276    pub parameter: Option<String>,
277    pub payload: String,
278    pub description: String,
279    pub evidence: Option<String>,
280    pub cwe: String,
281    pub cvss: f32,
282    pub verified: bool,
283    pub false_positive: bool,
284    pub remediation: String,
285    pub discovered_at: String,
286    /// ML model confidence score (0.0-1.0), set by MlEnhancer after scoring
287    #[serde(skip_serializing_if = "Option::is_none")]
288    pub ml_confidence: Option<f64>,
289    /// ML response data for learning (not serialized to reports)
290    /// This field is skipped during serialization and defaults to None
291    #[serde(skip)]
292    pub ml_data: Option<MlResponseData>,
293}
294
295impl Vulnerability {
296    /// Attach ML response data to this vulnerability for learning (GDPR-compliant)
297    /// Extracts features immediately - no raw data is stored
298    /// Call this when creating a vulnerability to enable ML learning
299    pub fn with_ml_data(
300        mut self,
301        response: &crate::http_client::HttpResponse,
302        baseline: Option<&crate::http_client::HttpResponse>,
303        payload: Option<&str>,
304    ) -> Self {
305        // Extract features immediately - don't store raw response data
306        let extractor = crate::ml::FeatureExtractor::new();
307        let features = extractor.extract(response, baseline, payload);
308
309        // Categorize payload without storing actual content
310        let payload_category = payload.map(|p| Self::categorize_payload(p));
311
312        self.ml_data = Some(MlResponseData {
313            features,
314            payload_category,
315        });
316        self
317    }
318
319    /// Categorize a payload into a privacy-safe category
320    fn categorize_payload(payload: &str) -> String {
321        let p = payload.to_lowercase();
322        if p.contains("select") || p.contains("union") || p.contains("'--") {
323            "sqli".to_string()
324        } else if p.contains("<script") || p.contains("javascript:") || p.contains("onerror") {
325            "xss".to_string()
326        } else if p.contains("http://") || p.contains("https://") || p.contains("file://") {
327            "ssrf".to_string()
328        } else if p.contains(";") && (p.contains("ls") || p.contains("cat") || p.contains("id")) {
329            "cmdi".to_string()
330        } else if p.contains("../") || p.contains("..\\") {
331            "path_traversal".to_string()
332        } else if p.contains("sleep") || p.contains("waitfor") || p.contains("benchmark") {
333            "time_based".to_string()
334        } else {
335            "other".to_string()
336        }
337    }
338
339    /// Check if this vulnerability has ML data attached
340    pub fn has_ml_data(&self) -> bool {
341        self.ml_data.is_some()
342    }
343
344    /// Get extracted ML features (GDPR-safe - no raw data)
345    pub fn get_ml_features(&self) -> Option<&crate::ml::VulnFeatures> {
346        self.ml_data.as_ref().map(|ml| &ml.features)
347    }
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
351#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
352pub enum Severity {
353    Critical,
354    High,
355    #[default]
356    Medium,
357    Low,
358    Info,
359}
360
361impl std::fmt::Display for Severity {
362    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
363        match self {
364            Severity::Critical => write!(f, "CRITICAL"),
365            Severity::High => write!(f, "HIGH"),
366            Severity::Medium => write!(f, "MEDIUM"),
367            Severity::Low => write!(f, "LOW"),
368            Severity::Info => write!(f, "INFO"),
369        }
370    }
371}
372
373#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
374#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
375pub enum Confidence {
376    High,
377    #[default]
378    Medium,
379    Low,
380}
381
382impl std::fmt::Display for Confidence {
383    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
384        match self {
385            Confidence::High => write!(f, "HIGH"),
386            Confidence::Medium => write!(f, "MEDIUM"),
387            Confidence::Low => write!(f, "LOW"),
388        }
389    }
390}
391
392#[derive(Debug, Clone)]
393pub struct ScanProgress {
394    pub scan_id: String,
395    pub progress: u8,
396    pub phase: String,
397    pub message: String,
398}
399
400impl Serialize for ScanProgress {
401    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
402    where
403        S: serde::Serializer,
404    {
405        use serde::ser::SerializeStruct;
406        let mut state = serializer.serialize_struct("ScanProgress", 4)?;
407        state.serialize_field("scanId", &self.scan_id)?;
408        state.serialize_field("progress", &self.progress)?;
409        state.serialize_field("phase", &self.phase)?;
410        state.serialize_field("message", &self.message)?;
411        state.end()
412    }
413}
414
415/// Source of a discovered parameter
416#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
417pub enum ParameterSource {
418    HtmlForm,
419    UrlQueryString,
420    JavaScriptMined,
421    ApiEndpoint,
422    GraphQL,
423    RequestHeader,
424    Cookie,
425    Unknown,
426}
427
428/// Type of endpoint being tested
429#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
430pub enum EndpointType {
431    FormSubmission,
432    RestApi,
433    GraphQlApi,
434    JsonRpc,
435    StaticContent,
436    Unknown,
437}
438
439/// Context passed to scanners for intelligent testing
440#[derive(Debug, Clone)]
441pub struct ScanContext {
442    /// Where this parameter was discovered
443    pub parameter_source: ParameterSource,
444
445    /// Type of endpoint
446    pub endpoint_type: EndpointType,
447
448    /// Detected technologies (framework, server, language)
449    pub detected_tech: Vec<String>,
450
451    /// Primary framework if detected (e.g., "Django", "Laravel", "Next.js")
452    pub framework: Option<String>,
453
454    /// Server type (e.g., "nginx", "Apache")
455    pub server: Option<String>,
456
457    /// Other parameters discovered on this endpoint
458    pub other_parameters: Vec<String>,
459
460    /// Is this a JSON API endpoint
461    pub is_json_api: bool,
462
463    /// Is this a GraphQL endpoint
464    pub is_graphql: bool,
465
466    /// Form structure if from a form
467    pub form_fields: Vec<String>,
468
469    /// Content-Type of responses
470    pub content_type: Option<String>,
471}
472
473impl Default for ScanContext {
474    fn default() -> Self {
475        Self {
476            parameter_source: ParameterSource::Unknown,
477            endpoint_type: EndpointType::Unknown,
478            detected_tech: Vec::new(),
479            framework: None,
480            server: None,
481            other_parameters: Vec::new(),
482            is_json_api: false,
483            is_graphql: false,
484            form_fields: Vec::new(),
485            content_type: None,
486        }
487    }
488}
489
490impl ScanContext {
491    pub fn new() -> Self {
492        Self::default()
493    }
494
495    /// Check if a specific technology is detected
496    pub fn has_tech(&self, tech: &str) -> bool {
497        self.detected_tech
498            .iter()
499            .any(|t| t.to_lowercase().contains(&tech.to_lowercase()))
500    }
501
502    /// Check if framework matches
503    pub fn is_framework(&self, name: &str) -> bool {
504        self.framework
505            .as_ref()
506            .map(|f| f.to_lowercase().contains(&name.to_lowercase()))
507            .unwrap_or(false)
508    }
509}