lonkero 3.6.2

Web scanner built for actual pentests. Fast, modular, Rust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
// Copyright (c) 2026 Bountyy Oy. All rights reserved.
// This software is proprietary and confidential.

/**
 * Bountyy Oy - Feature Extraction
 * Extracts ML features from HTTP responses and vulnerability findings
 *
 * @copyright 2026 Bountyy Oy
 * @license Proprietary
 */
use crate::http_client::HttpResponse;
use serde::{Deserialize, Serialize};

/// Extracted features from a vulnerability finding
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnFeatures {
    // Response characteristics
    pub status_code: u16,
    pub response_length: usize,
    pub response_time_ms: u64,

    // Content analysis
    pub has_html: bool,
    pub has_json: bool,
    pub has_xml: bool,
    pub has_javascript: bool,

    // Error detection
    pub has_sql_error: bool,
    pub has_stack_trace: bool,
    pub has_debug_info: bool,
    pub has_path_disclosure: bool,

    // Reflection analysis
    pub payload_reflected: bool,
    pub reflection_count: usize,
    pub reflection_in_attribute: bool,
    pub reflection_in_script: bool,
    pub reflection_encoded: bool,

    // Behavioral
    pub differs_from_baseline: bool,
    pub timing_anomaly: bool,
    pub status_changed: bool,
    pub length_changed_significantly: bool,

    // Context
    pub is_api_endpoint: bool,
    pub has_auth_headers: bool,
    pub has_session_cookie: bool,
}

impl VulnFeatures {
    /// Convert to feature vector for ML
    pub fn to_vector(&self) -> Vec<f32> {
        vec![
            // Response characteristics (normalized)
            self.status_code as f32 / 600.0,
            (self.response_length as f32).ln().max(0.0) / 20.0,
            (self.response_time_ms as f32).ln().max(0.0) / 10.0,
            // Content type flags
            self.has_html as u8 as f32,
            self.has_json as u8 as f32,
            self.has_xml as u8 as f32,
            self.has_javascript as u8 as f32,
            // Error flags
            self.has_sql_error as u8 as f32,
            self.has_stack_trace as u8 as f32,
            self.has_debug_info as u8 as f32,
            self.has_path_disclosure as u8 as f32,
            // Reflection flags
            self.payload_reflected as u8 as f32,
            (self.reflection_count as f32).min(10.0) / 10.0,
            self.reflection_in_attribute as u8 as f32,
            self.reflection_in_script as u8 as f32,
            self.reflection_encoded as u8 as f32,
            // Behavioral flags
            self.differs_from_baseline as u8 as f32,
            self.timing_anomaly as u8 as f32,
            self.status_changed as u8 as f32,
            self.length_changed_significantly as u8 as f32,
            // Context flags
            self.is_api_endpoint as u8 as f32,
            self.has_auth_headers as u8 as f32,
            self.has_session_cookie as u8 as f32,
        ]
    }

    /// Get feature names for interpretability
    pub fn feature_names() -> Vec<&'static str> {
        vec![
            "status_code_norm",
            "response_length_log",
            "response_time_log",
            "has_html",
            "has_json",
            "has_xml",
            "has_javascript",
            "has_sql_error",
            "has_stack_trace",
            "has_debug_info",
            "has_path_disclosure",
            "payload_reflected",
            "reflection_count_norm",
            "reflection_in_attribute",
            "reflection_in_script",
            "reflection_encoded",
            "differs_from_baseline",
            "timing_anomaly",
            "status_changed",
            "length_changed_significantly",
            "is_api_endpoint",
            "has_auth_headers",
            "has_session_cookie",
        ]
    }
}

/// Feature extractor for ML pipeline
pub struct FeatureExtractor {
    // SQL error patterns
    sql_patterns: Vec<&'static str>,
    // Stack trace patterns
    stack_patterns: Vec<&'static str>,
    // Path disclosure patterns
    path_patterns: Vec<&'static str>,
}

impl FeatureExtractor {
    pub fn new() -> Self {
        Self {
            sql_patterns: vec![
                "sql syntax",
                "mysql_",
                "ORA-",
                "PostgreSQL",
                "SQLite",
                "SQLSTATE",
                "syntax error",
                "unclosed quotation",
                "microsoft ole db",
                "odbc drivers",
            ],
            stack_patterns: vec![
                "at line",
                "stack trace",
                "Traceback",
                "Exception in",
                "Error at",
                ".java:",
                ".py:",
                ".php:",
                ".rb:",
                "at Object.",
            ],
            path_patterns: vec![
                "/var/www",
                "/home/",
                "C:\\",
                "D:\\",
                "/usr/",
                "/opt/",
                "wwwroot",
                "htdocs",
                "public_html",
            ],
        }
    }

    /// Extract features from HTTP response and vulnerability
    pub fn extract(
        &self,
        response: &HttpResponse,
        baseline: Option<&HttpResponse>,
        payload: Option<&str>,
    ) -> VulnFeatures {
        let body = &response.body;
        let body_lower = body.to_lowercase();

        // Content type detection
        let content_type = response
            .headers
            .get("content-type")
            .map(|s| s.to_lowercase())
            .unwrap_or_default();

        let has_html = content_type.contains("html") || body.contains("<html");
        let has_json =
            content_type.contains("json") || body.starts_with('{') || body.starts_with('[');
        let has_xml = content_type.contains("xml") || body.starts_with("<?xml");
        let has_javascript = content_type.contains("javascript") || body.contains("<script");

        // Error detection
        let has_sql_error = self.sql_patterns.iter().any(|p| body_lower.contains(p));
        let has_stack_trace = self.stack_patterns.iter().any(|p| body_lower.contains(p));
        let has_debug_info = body_lower.contains("debug") || body_lower.contains("verbose");
        let has_path_disclosure = self.path_patterns.iter().any(|p| body.contains(p));

        // Reflection analysis
        let (
            payload_reflected,
            reflection_count,
            reflection_in_attribute,
            reflection_in_script,
            reflection_encoded,
        ) = self.analyze_reflection(body, payload);

        // Baseline comparison
        let (differs_from_baseline, status_changed, length_changed_significantly) =
            self.compare_to_baseline(response, baseline);

        // Timing analysis (> 5 seconds is anomaly)
        let timing_anomaly = response.duration_ms > 5000;

        // Context detection
        let is_api_endpoint = body.starts_with('{')
            || body.starts_with('[')
            || response
                .headers
                .get("content-type")
                .map(|c| c.contains("json"))
                .unwrap_or(false);

        let has_auth_headers = response.headers.contains_key("authorization")
            || response.headers.contains_key("x-auth-token")
            || response.headers.contains_key("x-api-key");

        let has_session_cookie = response
            .headers
            .get("set-cookie")
            .map(|c| c.to_lowercase().contains("session") || c.to_lowercase().contains("sid"))
            .unwrap_or(false);

        VulnFeatures {
            status_code: response.status_code,
            response_length: body.len(),
            response_time_ms: response.duration_ms,
            has_html,
            has_json,
            has_xml,
            has_javascript,
            has_sql_error,
            has_stack_trace,
            has_debug_info,
            has_path_disclosure,
            payload_reflected,
            reflection_count,
            reflection_in_attribute,
            reflection_in_script,
            reflection_encoded,
            differs_from_baseline,
            timing_anomaly,
            status_changed,
            length_changed_significantly,
            is_api_endpoint,
            has_auth_headers,
            has_session_cookie,
        }
    }

    /// Analyze payload reflection in response
    fn analyze_reflection(
        &self,
        body: &str,
        payload: Option<&str>,
    ) -> (bool, usize, bool, bool, bool) {
        let Some(payload) = payload else {
            return (false, 0, false, false, false);
        };

        if payload.is_empty() {
            return (false, 0, false, false, false);
        }

        let payload_lower = payload.to_lowercase();
        let body_lower = body.to_lowercase();

        // Check for direct reflection
        let direct_count = body.matches(payload).count();
        let lower_count = body_lower.matches(&payload_lower).count();
        let reflected = direct_count > 0 || lower_count > 0;
        let count = direct_count.max(lower_count);

        // Check for reflection contexts
        let in_attribute =
            body.contains(&format!("=\"{}", payload)) || body.contains(&format!("='{}", payload));

        let in_script = {
            // Find script tags and check if payload is inside
            let script_pattern = regex::Regex::new(r"<script[^>]*>([\s\S]*?)</script>").unwrap();
            let mut found = false;
            for cap in script_pattern.captures_iter(body) {
                if cap
                    .get(1)
                    .map(|m| m.as_str().contains(payload))
                    .unwrap_or(false)
                {
                    found = true;
                    break;
                }
            }
            found
        };

        // Check for encoded versions
        let html_encoded = body.contains(&html_escape::encode_text(payload).to_string());
        let url_encoded = body.contains(&urlencoding::encode(payload).to_string());
        let encoded = html_encoded || url_encoded;

        (reflected, count, in_attribute, in_script, encoded)
    }

    /// Compare response to baseline
    fn compare_to_baseline(
        &self,
        response: &HttpResponse,
        baseline: Option<&HttpResponse>,
    ) -> (bool, bool, bool) {
        let Some(baseline) = baseline else {
            return (false, false, false);
        };

        let status_changed = response.status_code != baseline.status_code;

        let length_ratio = response.body.len() as f64 / baseline.body.len().max(1) as f64;
        let length_changed_significantly = length_ratio < 0.5 || length_ratio > 2.0;

        let differs =
            status_changed || length_changed_significantly || response.body != baseline.body;

        (differs, status_changed, length_changed_significantly)
    }
}

impl Default for FeatureExtractor {
    fn default() -> Self {
        Self::new()
    }
}

impl VulnFeatures {
    /// Create GDPR-compliant features from vulnerability metadata only
    /// No raw response data is stored - only statistical features extracted from
    /// already-existing vulnerability evidence and description fields
    ///
    /// This method is safe to use without storing any PII or response data
    pub fn from_vulnerability(vuln: &crate::types::Vulnerability) -> Self {
        let evidence = vuln.evidence.as_deref().unwrap_or("");
        let description = &vuln.description;
        let evidence_lower = evidence.to_lowercase();
        let desc_lower = description.to_lowercase();
        let combined_lower = format!("{} {}", evidence_lower, desc_lower);

        // SQL error patterns
        let sql_patterns = [
            "sql syntax",
            "mysql",
            "ora-",
            "postgresql",
            "sqlite",
            "sqlstate",
            "syntax error",
            "quotation",
            "ole db",
            "odbc",
        ];
        let has_sql_error = sql_patterns.iter().any(|p| combined_lower.contains(p));

        // Stack trace patterns
        let stack_patterns = [
            "at line",
            "stack trace",
            "traceback",
            "exception in",
            ".java:",
            ".py:",
            ".php:",
            ".rb:",
            "at object.",
        ];
        let has_stack_trace = stack_patterns.iter().any(|p| combined_lower.contains(p));

        // Path disclosure patterns
        let path_patterns = [
            "/var/www",
            "/home/",
            "c:\\",
            "d:\\",
            "/usr/",
            "/opt/",
            "wwwroot",
            "htdocs",
            "public_html",
        ];
        let has_path_disclosure = path_patterns
            .iter()
            .any(|p| evidence_lower.contains(p) || desc_lower.contains(p));

        // Content type detection from vuln type/category
        let vuln_type_lower = vuln.vuln_type.to_lowercase();
        let category_lower = vuln.category.to_lowercase();

        let has_html = vuln_type_lower.contains("xss") || vuln_type_lower.contains("html");
        let has_json = category_lower.contains("api") || vuln_type_lower.contains("json");
        let has_xml = vuln_type_lower.contains("xml") || vuln_type_lower.contains("xxe");
        let has_javascript = vuln_type_lower.contains("xss") || vuln_type_lower.contains("dom");

        // Reflection detection from evidence
        let payload_reflected = evidence_lower.contains("reflected")
            || evidence_lower.contains("payload")
            || evidence.contains(vuln.payload.as_str());

        let reflection_in_attribute = evidence_lower.contains("attribute")
            || evidence_lower.contains("=\"")
            || evidence_lower.contains("='");

        let reflection_in_script =
            evidence_lower.contains("<script") || evidence_lower.contains("javascript:");

        // Behavioral indicators from description
        let timing_anomaly = desc_lower.contains("time")
            && (desc_lower.contains("delay")
                || desc_lower.contains("slow")
                || desc_lower.contains("ms"));

        let differs_from_baseline = desc_lower.contains("different")
            || desc_lower.contains("changed")
            || desc_lower.contains("baseline");

        // API endpoint detection from URL
        let url_lower = vuln.url.to_lowercase();
        let is_api_endpoint = url_lower.contains("/api/")
            || url_lower.contains("/graphql")
            || url_lower.contains("/v1/")
            || url_lower.contains("/v2/")
            || url_lower.contains("/rest/");

        // Auth detection from evidence/description
        let has_auth_headers = desc_lower.contains("authorization")
            || desc_lower.contains("auth header")
            || desc_lower.contains("bearer");

        let has_session_cookie = desc_lower.contains("session") || desc_lower.contains("cookie");

        Self {
            // Use placeholder values since we don't have raw response
            status_code: if vuln.verified { 200 } else { 0 },
            response_length: evidence.len(),
            response_time_ms: if timing_anomaly { 5000 } else { 100 },
            has_html,
            has_json,
            has_xml,
            has_javascript,
            has_sql_error,
            has_stack_trace,
            has_debug_info: desc_lower.contains("debug") || evidence_lower.contains("debug"),
            has_path_disclosure,
            payload_reflected,
            reflection_count: if payload_reflected { 1 } else { 0 },
            reflection_in_attribute,
            reflection_in_script,
            reflection_encoded: evidence_lower.contains("encoded")
                || evidence_lower.contains("&lt;"),
            differs_from_baseline,
            timing_anomaly,
            status_changed: false, // Can't determine without baseline
            length_changed_significantly: false, // Can't determine without baseline
            is_api_endpoint,
            has_auth_headers,
            has_session_cookie,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashMap;

    fn create_test_response(body: &str, status: u16) -> HttpResponse {
        HttpResponse {
            status_code: status,
            headers: HashMap::new(),
            body: body.to_string(),
            duration_ms: 100,
        }
    }

    #[test]
    fn test_sql_error_detection() {
        let extractor = FeatureExtractor::new();
        let response = create_test_response("Error: You have an error in your SQL syntax", 500);

        let features = extractor.extract(&response, None, None);
        assert!(features.has_sql_error);
    }

    #[test]
    fn test_reflection_detection() {
        let extractor = FeatureExtractor::new();
        let response = create_test_response("<input value=\"<script>alert(1)</script>\">", 200);

        let features = extractor.extract(&response, None, Some("<script>alert(1)</script>"));
        assert!(features.payload_reflected);
        assert!(features.reflection_in_attribute);
    }

    #[test]
    fn test_feature_vector_length() {
        let features = VulnFeatures {
            status_code: 200,
            response_length: 1000,
            response_time_ms: 100,
            has_html: true,
            has_json: false,
            has_xml: false,
            has_javascript: true,
            has_sql_error: false,
            has_stack_trace: false,
            has_debug_info: false,
            has_path_disclosure: false,
            payload_reflected: true,
            reflection_count: 1,
            reflection_in_attribute: true,
            reflection_in_script: false,
            reflection_encoded: false,
            differs_from_baseline: true,
            timing_anomaly: false,
            status_changed: false,
            length_changed_significantly: false,
            is_api_endpoint: false,
            has_auth_headers: false,
            has_session_cookie: false,
        };

        let vector = features.to_vector();
        assert_eq!(vector.len(), VulnFeatures::feature_names().len());
    }
}