web_analyzer/
react_honeypot.rs

1//! # React2Shell Honeypot — Attack Vector Detection & Attacker Intelligence
2//!
3//! A realistic React Server Components (RSC) honeypot that detects **45+ attack
4//! vectors** while silently collecting comprehensive attacker intelligence.
5//!
6//! ## Capabilities
7//!
8//! - **Attack Detection** — SQLi, XSS, SSRF, SSTI, LFI/RFI, command injection,
9//!   NoSQLi, XXE, deserialization, JWT attacks, GraphQL injection, CRLF, path
10//!   traversal, prototype pollution, and 30+ more categories
11//! - **Attacker Profiling** — IP, GeoIP, User-Agent, OS/browser fingerprint,
12//!   request cadence, technique enumeration, session correlation
13//! - **Realistic RSC Simulation** — Fake Server Action endpoints, plausible error
14//!   messages, timing jitter, progressive response sizes
15//! - **Structured Intelligence** — JSON-serializable event logs, severity
16//!   scoring, risk classification, MITRE ATT&CK mapping
17
18use chrono::Utc;
19use regex::Regex;
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::time::{Duration, Instant};
23
24// ═════════════════════════════════════════════════════════════════════════════
25// Core Types
26// ═════════════════════════════════════════════════════════════════════════════
27
28/// Severity level for a detected attack.
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
30pub enum Severity {
31    #[serde(rename = "info")]
32    Info,
33    #[serde(rename = "low")]
34    Low,
35    #[serde(rename = "medium")]
36    Medium,
37    #[serde(rename = "high")]
38    High,
39    #[serde(rename = "critical")]
40    Critical,
41}
42
43/// An individual detected attack event.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct AttackEvent {
46    /// Unique event ID (UUIDv4-style timestamp-based).
47    pub event_id: String,
48    /// ISO-8601 timestamp.
49    pub timestamp: String,
50    /// Attack category (e.g. "sqli", "xss", "ssrf").
51    pub category: String,
52    /// Sub-category or specific technique.
53    pub subcategory: String,
54    /// The matched payload/pattern excerpt.
55    pub matched_payload: String,
56    /// Full incoming payload (truncated for storage).
57    pub full_payload: String,
58    /// HTTP method used.
59    pub method: String,
60    /// Request path/endpoint targeted.
61    pub path: String,
62    /// Severity assessment.
63    pub severity: Severity,
64    /// MITRE ATT&CK technique ID.
65    pub mitre_id: Option<String>,
66    /// The honeypot's simulated response code.
67    pub simulated_response: u16,
68    /// IP address of the attacker.
69    pub attacker_ip: String,
70    /// Raw User-Agent header.
71    pub user_agent: String,
72    /// All captured headers (sanitized).
73    pub headers: HashMap<String, String>,
74    /// Session tracking ID (cookie or fingerprint).
75    pub session_id: Option<String>,
76    /// Confidence score (0.0–1.0) that this is an actual attack.
77    pub confidence: f64,
78}
79
80/// Accumulated profile of an attacker across multiple requests.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct AttackerProfile {
83    /// Unique profile ID (derived from IP + fingerprint).
84    pub profile_id: String,
85    /// IP address.
86    pub ip: String,
87    /// GeoIP country code (if resolved).
88    pub country: Option<String>,
89    /// GeoIP ASN/organization.
90    pub asn: Option<String>,
91    /// Whether the IP is a known Tor exit node.
92    pub is_tor: bool,
93    /// Whether the IP belongs to a known cloud provider.
94    pub is_cloud: bool,
95    /// Whether the connection came through a proxy.
96    pub is_proxy: bool,
97    /// User-Agent string from the first request.
98    pub user_agent: String,
99    /// Parsed browser/OS fingerprint.
100    pub browser_fingerprint: Option<BrowserFingerprint>,
101    /// First-seen timestamp.
102    pub first_seen: String,
103    /// Last-seen timestamp.
104    pub last_seen: String,
105    /// Total requests sent.
106    pub total_requests: u64,
107    /// Count per attack category.
108    pub attack_categories: HashMap<String, u64>,
109    /// Techniques observed.
110    pub techniques_used: Vec<String>,
111    /// Average request interval (seconds).
112    pub avg_request_interval: f64,
113    /// Whether the attacker appears automated (bot/script).
114    pub is_automated: bool,
115    /// Cumulative risk score (0–100).
116    pub risk_score: f64,
117    /// List of targeted endpoints.
118    pub targets: Vec<String>,
119    /// Timeline of detected events.
120    pub event_timeline: Vec<String>,
121}
122
123/// Parsed browser and OS information from User-Agent.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct BrowserFingerprint {
126    pub browser: String,
127    pub browser_version: String,
128    pub os: String,
129    pub os_version: String,
130    pub engine: String,
131    pub device_type: String,
132    pub is_headless: bool,
133}
134
135/// Configuration for the honeypot engine.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct HoneypotConfig {
138    /// Maximum payload size to store (bytes).
139    pub max_payload_store: usize,
140    /// Whether to simulate realistic RSC timing delays.
141    pub realistic_timing: bool,
142    /// Minimum simulated delay (ms).
143    pub min_delay_ms: u64,
144    /// Maximum simulated delay (ms).
145    pub max_delay_ms: u64,
146    /// Whether to respond with fake RSC content.
147    pub fake_rsc_responses: bool,
148    /// Whether to track sessions via cookie/fingerprint.
149    pub session_tracking: bool,
150    /// Session cookie name to set.
151    pub session_cookie: String,
152    /// Whether to log all requests (not just attacks).
153    pub log_all_requests: bool,
154    /// Confidence threshold for considering a detection as an attack.
155    pub detection_threshold: f64,
156    /// Whether to simulate progressive response sizes (keep attackers engaged).
157    pub progressive_sizing: bool,
158}
159
160impl Default for HoneypotConfig {
161    fn default() -> Self {
162        Self {
163            max_payload_store: 8192,
164            realistic_timing: true,
165            min_delay_ms: 20,
166            max_delay_ms: 180,
167            fake_rsc_responses: true,
168            session_tracking: true,
169            session_cookie: "__Host-RSC-ID".to_string(),
170            log_all_requests: false,
171            detection_threshold: 0.5,
172            progressive_sizing: true,
173        }
174    }
175}
176
177/// The complete honeypot state.
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct HoneypotState {
180    pub config: HoneypotConfig,
181    pub total_requests: u64,
182    pub total_attacks_detected: u64,
183    pub unique_attackers: usize,
184    pub attack_events: Vec<AttackEvent>,
185    pub attacker_profiles: HashMap<String, AttackerProfile>,
186    pub uptime_seconds: f64,
187    pub requests_per_minute: f64,
188}
189
190/// Result of analyzing a single request.
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct DetectionResult {
193    /// All attack vectors detected in the request.
194    pub detections: Vec<AttackEvent>,
195    /// The simulated HTTP response status code.
196    pub simulated_status: u16,
197    /// The simulated response body.
198    pub simulated_body: String,
199    /// Recommended content-type for the response.
200    pub content_type: String,
201    /// Whether the request should be blocked.
202    pub should_block: bool,
203    /// Suggested delay before responding (ms).
204    pub suggested_delay_ms: u64,
205}
206
207/// Raw request input for analysis.
208#[derive(Debug, Clone)]
209pub struct RawRequest {
210    pub method: String,
211    pub path: String,
212    pub query_string: String,
213    pub body: String,
214    pub headers: HashMap<String, String>,
215    pub ip: String,
216    pub timestamp: chrono::DateTime<Utc>,
217}
218
219// ═════════════════════════════════════════════════════════════════════════════
220// Attack Vector Definitions (45+ Categories)
221// ═════════════════════════════════════════════════════════════════════════════
222
223/// Definition of a single attack vector detector.
224struct AttackVector {
225    category: &'static str,
226    subcategory: &'static str,
227    patterns: &'static [&'static str],
228    severity: Severity,
229    mitre_id: &'static str,
230    /// Where to search: "body", "query", "path", "headers", "all"
231    search_location: &'static str,
232    /// Additional context keywords that must also be present (AND logic).
233    context_keywords: &'static [&'static str],
234}
235
236/// A pre-compiled attack vector with compiled regex patterns.
237struct CompiledVector {
238    category: &'static str,
239    subcategory: &'static str,
240    patterns: Vec<Regex>,
241    severity: Severity,
242    mitre_id: &'static str,
243    search_location: &'static str,
244    context_keywords: &'static [&'static str],
245}
246
247/// All 45+ attack vector definitions.
248fn attack_vectors() -> &'static [AttackVector] {
249    use Severity::*;
250    static VECTORS: std::sync::OnceLock<Vec<AttackVector>> = std::sync::OnceLock::new();
251    VECTORS.get_or_init(|| vec![
252            // ── SQL Injection (Classic, Union, Blind, Error, Stacked, Time) ──
253            AttackVector {
254                category: "sqli", subcategory: "classic_tautology", severity: Critical,
255                mitre_id: "T1190",
256                search_location: "all",
257                context_keywords: &[],
258                patterns: &[
259                    r"(?i)('|\%27)\s*(OR|AND)\s*('|\%27)?\s*\d+\s*=\s*\d+",
260                    r"(?i)('|\%27)\s*(OR|AND)\s*('|\%27)?\s*'\d+'\s*=\s*'\d+'",
261                    r"(?i)(OR|AND)\s+\d+\s*=\s*\d+\s*--",
262                    r"(?i)admin'\s*(--|#|/\*)",
263                    r#"(?i)['"]\s*OR\s+1\s*=\s*1\s*--"#,
264                    r"(?i)'\s*OR\s+'1'\s*=\s*'1",
265                ],
266            },
267            AttackVector {
268                category: "sqli", subcategory: "union_select", severity: Critical,
269                mitre_id: "T1190",
270                search_location: "all",
271                context_keywords: &[],
272                patterns: &[
273                    r"(?i)UNION\s+(ALL\s+)?SELECT\s+(NULL|@@|\d+|user\b|database\b)",
274                    r"(?i)UNION\s+(ALL\s+)?SELECT\s+(NULL|@@|\d+|user\b|database\b).*--",
275                    r"(?i)'\s*UNION\s+SELECT\s+.*FROM\s+",
276                ],
277            },
278            AttackVector {
279                category: "sqli", subcategory: "blind_time", severity: Critical,
280                mitre_id: "T1190",
281                search_location: "all",
282                context_keywords: &[],
283                patterns: &[
284                    r"(?i)(SLEEP|pg_sleep|WAITFOR\s+DELAY|dbms_lock\.sleep|benchmark)\s*\(.*\d+",
285                    r"(?i)AND\s+(SLEEP|pg_sleep|WAITFOR)\s*\(\s*\d+\s*\)",
286                    r"(?i)'\s*AND\s+(SELECT\s+.*FROM\s+.*SLEEP)",
287                ],
288            },
289            AttackVector {
290                category: "sqli", subcategory: "error_based", severity: Critical,
291                mitre_id: "T1190",
292                search_location: "all",
293                context_keywords: &[],
294                patterns: &[
295                    r"(?i)extractvalue\s*\(\s*\d+\s*,\s*concat\s*\(",
296                    r"(?i)updatexml\s*\(\s*\d+\s*,\s*concat\s*\(",
297                    r"(?i)convert\s*\(.*using\s+",
298                    r"(?i)AND\s+1\s*=\s*CONVERT\s*\(int",
299                ],
300            },
301            AttackVector {
302                category: "sqli", subcategory: "stacked", severity: Critical,
303                mitre_id: "T1190",
304                search_location: "all",
305                context_keywords: &[],
306                patterns: &[
307                    r"(?i);\s*(DROP|INSERT|UPDATE|DELETE|ALTER|CREATE|EXEC|TRUNCATE|SHUTDOWN)\s+",
308                    r"(?i)';\s*(DROP|INSERT|UPDATE|DELETE)\s+",
309                    r"(?i);\s*EXEC\s+(sp_|xp_)",
310                ],
311            },
312            // ── NoSQL Injection ──
313            AttackVector {
314                category: "nosqli", subcategory: "mongodb", severity: Critical,
315                mitre_id: "T1190",
316                search_location: "all",
317                context_keywords: &[],
318                patterns: &[
319                    r#"(?i)\{\s*"\$ne"\s*:\s*""#,
320                    r#"(?i)\{\s*"\$gt"\s*:\s*""#,
321                    r#"(?i)\{\s*"\$regex"\s*:\s*".*"\s*\}"#,
322                    r#"(?i)\{\s*"\$where"\s*:\s*""#,
323                    r#"(?i)"\$(eq|ne|gt|gte|lt|lte|in|nin|regex|exists|type|mod|text|search|where)"\s*:"#,
324                    r"(?i)\{\s*'\$ne'\s*:\s*",
325                ],
326            },
327            AttackVector {
328                category: "nosqli", subcategory: "redis_injection", severity: High,
329                mitre_id: "T1190",
330                search_location: "all",
331                context_keywords: &[],
332                patterns: &[
333                    r"(?i)(\r\n|\n)\s*(CONFIG|SET|GET|FLUSHALL|KEYS|SAVE|SHUTDOWN|SLAVEOF)\s",
334                    r"(?i)%0[dD]%0[aA]\s*(CONFIG|SET|FLUSHALL)",
335                ],
336            },
337            // ── Cross-Site Scripting (XSS) ──
338            AttackVector {
339                category: "xss", subcategory: "reflected", severity: High,
340                mitre_id: "T1059.007",
341                search_location: "all",
342                context_keywords: &[],
343                patterns: &[
344                    r"(?i)<script[^>]*>.*</script>",
345                    r"(?i)<script[^>]*>.*",
346                    r#"(?i)javascript\s*:\s*(alert|prompt|confirm)\s*\("#,
347                    r#"(?i)"><script[^>]*>alert\("#,
348                    r#"(?i)<img[^>]+onerror\s*=\s*[`'\"]?\w+"#,
349                    r#"(?i)<svg[^>]+onload\s*=\s*[`'\"]?\w+"#,
350                ],
351            },
352            AttackVector {
353                category: "xss", subcategory: "polyglot", severity: High,
354                mitre_id: "T1059.007",
355                search_location: "all",
356                context_keywords: &[],
357                patterns: &[
358                    r#"(?i)jaVasCript:/*-/*`/*\`/*'/*"/**/(\s*/\*\s*/.*\)\s*;)"#,
359                    r#"(?i)"\s*;\s*alert\s*\(.*\)\s*//"#,
360                ],
361            },
362            AttackVector {
363                category: "xss", subcategory: "stored_payload", severity: High,
364                mitre_id: "T1059.007",
365                search_location: "all",
366                context_keywords: &[],
367                patterns: &[
368                    r#"(?i)<iframe[^>]*srcdoc\s*=\s*[`'\"]?\s*<script"#,
369                    r#"(?i)<object[^>]*data\s*=\s*[`'\"]?data:text/html"#,
370                    r#"(?i)<embed[^>]*src\s*=\s*[`'\"]?data:text/html"#,
371                ],
372            },
373            // ── Command Injection ──
374            AttackVector {
375                category: "cmdi", subcategory: "unix_pipe", severity: Critical,
376                mitre_id: "T1059.004",
377                search_location: "all",
378                context_keywords: &[],
379                patterns: &[
380                    r"(?m)[\|\;`]\s*(id|whoami|ls|cat|pwd|uname|hostname)\s*$",
381                    r"(?m)\$\(\s*(id|whoami|ls|cat|wget|curl)\s*",
382                    r"(?m)`\s*(id|whoami|ls|cat)\s*`",
383                    r"(?m)\|\|\s*(id|whoami|ls|cat|ping)\s",
384                    r"(?m)&&\s*(id|whoami|ls|cat|ping)\s",
385                    r"(?m);\s*(id|whoami|ls|cat|ping|sleep)\s",
386                ],
387            },
388            AttackVector {
389                category: "cmdi", subcategory: "unix_advanced", severity: Critical,
390                mitre_id: "T1059.004",
391                search_location: "all",
392                context_keywords: &[],
393                patterns: &[
394                    r"(?m)(/usr/bin/|/bin/|/sbin/)(id|whoami|ls|cat|bash|sh|nc|wget|curl)",
395                    r"(?m)\|\s*(nc|ncat|netcat)\s",
396                    r"(?m)\|\s*(wget|curl)\s+http",
397                    r"(?m);\s*/bin/(bash|sh|dash)\s+-[ci]",
398                    r"(?m);\s*(chmod|chown)\s",
399                ],
400            },
401            AttackVector {
402                category: "cmdi", subcategory: "windows", severity: Critical,
403                mitre_id: "T1059.003",
404                search_location: "all",
405                context_keywords: &[],
406                patterns: &[
407                    r"(?i)[\|\;`]\s*(whoami|systeminfo|ipconfig|net\s+user|tasklist)\b",
408                    r"(?i)cmd\.exe\s+/[cCkK]\s+",
409                    r"(?i)powershell\.exe\s+-[eE][xX]",
410                    r"(?i)%(COMSPEC|SystemRoot|WINDIR)%",
411                    r"(?i)certutil\s+-urlcache\s+-split\s+-f\s+http",
412                ],
413            },
414            AttackVector {
415                category: "cmdi", subcategory: "blind_oob", severity: Critical,
416                mitre_id: "T1059.004",
417                search_location: "all",
418                context_keywords: &[],
419                patterns: &[
420                    r"(?m)\|\s*(nslookup|dig|host)\s+[a-zA-Z0-9]",
421                    r"(?m)ping\s+-[cnt]\s+\d+\s+[a-zA-Z0-9]",
422                    r"(?m);\s*(nslookup|dig|host|ping)\s+\$\{",
423                ],
424            },
425            // ── Path Traversal / Directory Traversal ──
426            AttackVector {
427                category: "path_traversal", subcategory: "dot_dot_slash", severity: High,
428                mitre_id: "T1083",
429                search_location: "all",
430                context_keywords: &[],
431                patterns: &[
432                    r"(\.\./){2,}(etc|var|proc|sys|home|root|tmp|windows|winnt)",
433                    r"(\.\.\\){2,}(windows|winnt|system32|boot\.ini)",
434                    r"\.\./\.\./\.\./.*(passwd|shadow|hosts|\.ini|\.conf)",
435                    r"\.%2e/\.%2e/",
436                    r"\.%252e/\.%252e/",
437                    r"\.\.%2f\.\.%2f",
438                    r"\.\.%5c\.\.%5c",
439                    r"file:///(etc|proc|sys|var|home)/",
440                ],
441            },
442            AttackVector {
443                category: "path_traversal", subcategory: "absolute_path", severity: High,
444                mitre_id: "T1083",
445                search_location: "all",
446                context_keywords: &[],
447                patterns: &[
448                    r"^/(etc|proc|sys|var|root|home)/.*(passwd|shadow|hosts|\.conf)",
449                    r"^(C:|D:)\\(windows|winnt|system32)\\.*",
450                ],
451            },
452            // ── LFI / RFI (Local/Remote File Inclusion) ──
453            AttackVector {
454                category: "lfi", subcategory: "local_include", severity: Critical,
455                mitre_id: "T1190",
456                search_location: "all",
457                context_keywords: &[],
458                patterns: &[
459                    r"(?i)(file|page|path|include|require|document|folder|dir|template|module|load)\s*=\s*(\.\./|/etc/|/proc/)",
460                    r"(?i)/etc/(passwd|shadow|hosts|group|sudoers|resolv\.conf)",
461                    r"(?i)/proc/(self|version|cpuinfo|meminfo|cmdline)/?",
462                    r"(?i)/var/log/(apache|nginx|syslog|messages|auth\.log)",
463                    r"(?i)C:\\windows\\(system32|win\.ini|boot\.ini|repair\\sam)",
464                    r"(?i)php://filter/convert\.base64-encode/resource=",
465                    r"(?i)php://filter/read=convert\.base64-encode/resource=",
466                    r"(?i)php://input",
467                    r"(?i)expect://(id|whoami|ls)",
468                    r"(?i)data://text/plain;base64,",
469                ],
470            },
471            AttackVector {
472                category: "rfi", subcategory: "remote_include", severity: Critical,
473                mitre_id: "T1190",
474                search_location: "all",
475                context_keywords: &[],
476                patterns: &[
477                    r"(?i)(https?|ftp)://[^/\s]+/[^?\s]+\.(php|txt|jpg|png|gif)\?",
478                    r"(?i)(https?|ftp)://.*(shell|backdoor|r57|c99|web-shell)",
479                    r"(?i)(https?|ftp)://.*/.*\.(txt|php|asp|jsp)\?",
480                ],
481            },
482            // ── SSRF (Server-Side Request Forgery) ──
483            AttackVector {
484                category: "ssrf", subcategory: "cloud_metadata", severity: Critical,
485                mitre_id: "T1190",
486                search_location: "all",
487                context_keywords: &[],
488                patterns: &[
489                    r"(?i)(169\.254\.\d+\.\d+|metadata\.google\.internal|100\.100\.\d+\.\d+)",
490                    r"(?i)(/latest/meta-data|/metadata/v1|/openstack)",
491                    r"(?i)instance-data\.ec2\.internal",
492                    r"(?i)/latest/(meta-data|dynamic|user-data)",
493                    r"(?i)kubernetes\.default\.svc",
494                    r"(?i)\.compute\.internal",
495                ],
496            },
497            AttackVector {
498                category: "ssrf", subcategory: "internal_ports", severity: High,
499                mitre_id: "T1190",
500                search_location: "all",
501                context_keywords: &[],
502                patterns: &[
503                    r"(?i)(http://|https://)(localhost|127\.\d+\.\d+\.\d+|0\.0\.0\.0|\[::1\])\s*[/:]",
504                    r"(?i)(http://|https://)(10\.\d+\.\d+\.\d+|172\.1[6-9]\.\d+\.\d+|172\.2\d\.\d+\.\d+|172\.3[01]\.\d+\.\d+|192\.168\.\d+\.\d+)",
505                ],
506            },
507            AttackVector {
508                category: "ssrf", subcategory: "dns_rebinding", severity: Medium,
509                mitre_id: "T1190",
510                search_location: "all",
511                context_keywords: &[],
512                patterns: &[
513                    r"(?i)([a-z0-9]+\.){2,}(1zero|rbndr|nip\.io|xip\.io|sslip\.io)",
514                    r"(?i)(nslookup|dig|host)\s+[a-z0-9]+\.[a-z]+\.[a-z]+",
515                ],
516            },
517            // ── XXE (XML External Entity) ──
518            AttackVector {
519                category: "xxe", subcategory: "external_entity", severity: Critical,
520                mitre_id: "T1190",
521                search_location: "body",
522                context_keywords: &[],
523                patterns: &[
524                    r#"<!ENTITY\s+\w+\s+(SYSTEM|PUBLIC)\s+['\"]"#,
525                    r#"<!ENTITY\s+%\s+\w+\s+SYSTEM\s+['\"]"#,
526                    r"<!DOCTYPE\s+\w+\s+\[\s*<!ENTITY",
527                    r"<xml[^>]*>\s*<!DOCTYPE",
528                    r#"<\?xml[^?]*\?>\s*<!DOCTYPE\s+\w+\s+\["#,
529                ],
530            },
531            AttackVector {
532                category: "xxe", subcategory: "billion_laughs", severity: Critical,
533                mitre_id: "T1499.002",
534                search_location: "body",
535                context_keywords: &[],
536                patterns: &[
537                    r#"<!ENTITY\s+\w+\s+['\"]<!ENTITY"#,
538                    r"&(lol|lolz|lol1|lol2|laugh|boom|ha|haha);",
539                ],
540            },
541            // ── SSTI (Server-Side Template Injection) ──
542            AttackVector {
543                category: "ssti", subcategory: "jinja2", severity: Critical,
544                mitre_id: "T1190",
545                search_location: "all",
546                context_keywords: &[],
547                patterns: &[
548                    r"\{\{\s*(\d+\s*[\*\+\-]\s*\d+|\w+\.\w+)",
549                    r"\{\{\s*config\s*\}\}",
550                    r"\{\{\s*self\s*\}\}",
551                    r"\{\{\s*''\.__class__\.__mro__",
552                    r"\{\{\s*lipsum\.__globals__",
553                    r"\{\{\s*request\.application\.__globals__",
554                    r"\{\%\s*(import|extends|include|set|for|if)\s+",
555                    r"\{\{\s*cycler\.__init__\.__globals__",
556                ],
557            },
558            AttackVector {
559                category: "ssti", subcategory: "twig", severity: Critical,
560                mitre_id: "T1190",
561                search_location: "all",
562                context_keywords: &[],
563                patterns: &[
564                    r"\{\{\s*_self\.env\.registerUndefinedFilterCallback",
565                    r#"\{\{\s*['\"].*['\"]\s*\|\s*map\("#,
566                    r#"\{\{\s*['\"].*['\"]\s*\|\s*filter\("#,
567                ],
568            },
569            AttackVector {
570                category: "ssti", subcategory: "freemarker", severity: Critical,
571                mitre_id: "T1190",
572                search_location: "all",
573                context_keywords: &[],
574                patterns: &[
575                    r"\$\{.*\.class\.forName\(",
576                    r#"<\#assign\s+ex\s*=\s*['\"]freemarker"#,
577                    r"\$\{(.*\?)?new\s+java\.\w+\(",
578                ],
579            },
580            // ── Deserialization Attacks ──
581            AttackVector {
582                category: "deserialization", subcategory: "java", severity: Critical,
583                mitre_id: "T1190",
584                search_location: "all",
585                context_keywords: &[],
586                patterns: &[
587                    r"(?i)(ac ed 00 05|rO0AB|aced0005)",
588                    r"(?i)(com\.sun\.org\.apache\.xalan|org\.apache\.commons\.collections)",
589                    r"(?i)(java\.lang\.Runtime|java\.lang\.ProcessBuilder)",
590                    r"(?i)(org\.springframework\.beans\.factory)",
591                ],
592            },
593            AttackVector {
594                category: "deserialization", subcategory: "php", severity: Critical,
595                mitre_id: "T1190",
596                search_location: "all",
597                context_keywords: &[],
598                patterns: &[
599                    r#"(?i)(O:\d+:['\"][A-Za-z0-9_\\]+['\"]:\d+:)"#,
600                    r"(?i)(a:\d+:\{.*s:\d+:)",
601                    r#"(?i)(C:\d+:['\"][A-Za-z0-9_\\]+['\"]:\d+:)"#,
602                ],
603            },
604            AttackVector {
605                category: "deserialization", subcategory: "python_pickle", severity: Critical,
606                mitre_id: "T1190",
607                search_location: "all",
608                context_keywords: &[],
609                patterns: &[
610                    r"(?i)(cos\\nsystem|c__builtin__\\neval|csubprocess\\nPopen)",
611                    r"(?i)(__reduce__|__reduce_ex__)",
612                    r"(?i)(S'((import|exec|eval)\b|__import__)",
613                ],
614            },
615            AttackVector {
616                category: "deserialization", subcategory: "nodejs", severity: Critical,
617                mitre_id: "T1190",
618                search_location: "all",
619                context_keywords: &[],
620                patterns: &[
621                    r#"(?i)\{"_bsontype":"Code","code":"[^"]*require\(['"]child_process"#,
622                    r#"(?i)\{"type":"Function","body":"[^"]*require\("#,
623                ],
624            },
625            // ── JWT Attacks ──
626            AttackVector {
627                category: "jwt", subcategory: "none_algorithm", severity: Critical,
628                mitre_id: "T1557",
629                search_location: "all",
630                context_keywords: &[],
631                patterns: &[
632                    r#"(?i)"alg"\s*:\s*"none""#,
633                    r"(?i)ey[A-Za-z0-9_-]+\.ey[A-Za-z0-9_-]+\.(?:$|\s|&)",
634                ],
635            },
636            AttackVector {
637                category: "jwt", subcategory: "key_confusion", severity: High,
638                mitre_id: "T1557",
639                search_location: "all",
640                context_keywords: &[],
641                patterns: &[
642                    r#"(?i)"alg"\s*:\s*"HS256"[^}]*"k"\s*:"#,
643                    r#"(?i)"jwk"\s*:\s*\{[^}]*"kty"\s*:"#,
644                ],
645            },
646            // ── GraphQL Attacks ──
647            AttackVector {
648                category: "graphql", subcategory: "introspection", severity: Medium,
649                mitre_id: "T1190",
650                search_location: "body",
651                context_keywords: &[],
652                patterns: &[
653                    r"__schema\s*\{\s*types\s*\{",
654                    r#"__type\s*\(\s*name\s*:\s*\"\""#,
655                    r"query\s*\{\s*__schema\{",
656                    r"fragment\s+FullType\s+on\s+__Type\s*\{",
657                ],
658            },
659            AttackVector {
660                category: "graphql", subcategory: "batch_attack", severity: High,
661                mitre_id: "T1190",
662                search_location: "body",
663                context_keywords: &[],
664                patterns: &[
665                    r#"\[\s*\{\s*\"query\""#,
666                    r#"\"batch\"\s*:\s*\["#,
667                ],
668            },
669            // ── Prototype Pollution ──
670            AttackVector {
671                category: "prototype_pollution", subcategory: "javascript", severity: High,
672                mitre_id: "T1059.007",
673                search_location: "all",
674                context_keywords: &[],
675                patterns: &[
676                    r#"(?i)(__proto__|constructor|prototype)\s*=\s*["']"#,
677                    r#"(?i)"__proto__"\s*:\s*\{[^}]*\}"#,
678                    r#"(?i)"constructor"\s*:\s*\{[^}]*"prototype"\s*:"#,
679                    r#"(?i)\[\[__proto__\]\]\s*=\s*"#,
680                ],
681            },
682            // ── CRLF Injection ──
683            AttackVector {
684                category: "crlf", subcategory: "response_splitting", severity: High,
685                mitre_id: "T1190",
686                search_location: "all",
687                context_keywords: &[],
688                patterns: &[
689                    r"(\r\n|\%0[dD]\%0[aA])\s*Content-(Type|Length|Disposition):",
690                    r"(\r\n|\%0[dD]\%0[aA])\s*Set-Cookie\s*:",
691                    r"(\r\n|\%0[dD]\%0[aA])\s*(HTTP/|Location\s*:)",
692                    r"(\r\n|\%0[dD]\%0[aA])\s*X-XSS-Protection\s*:",
693                ],
694            },
695            AttackVector {
696                category: "crlf", subcategory: "header_injection", severity: Medium,
697                mitre_id: "T1190",
698                search_location: "all",
699                context_keywords: &[],
700                patterns: &[
701                    r"(\r\n|\%0[dD]\%0[aA])\s*[A-Za-z0-9\-]+\s*:\s*[^\n]+\r?\n",
702                ],
703            },
704            // ── HTTP Host Header / Request Smuggling ──
705            AttackVector {
706                category: "http_smuggling", subcategory: "cl_te", severity: High,
707                mitre_id: "T1190",
708                search_location: "headers",
709                context_keywords: &[],
710                patterns: &[
711                    r"(?i)^\s*Transfer-Encoding\s*:\s*[\x0b]",
712                    r"(?i)^\s*Transfer-Encoding\s*:.*\x0b",
713                    r"(?i)^\s*Content-Length\s*:\s*\d+\s*\n\s*Content-Length",
714                ],
715            },
716            AttackVector {
717                category: "host_attack", subcategory: "host_injection", severity: High,
718                mitre_id: "T1190",
719                search_location: "headers",
720                context_keywords: &[],
721                patterns: &[
722                    r"(?i)^\s*Host\s*:\s*(evil|attacker|malware|hack|bugbounty|pwned)\.(com|net|org|io)",
723                    r"(?i)^\s*Host\s*:\s*(127\.0\.0\.1|localhost|0\.0\.0\.0)",
724                    r"(?i)^\s*X-Forwarded-Host\s*:\s*(evil|attacker|127\.0\.0\.1)",
725                ],
726            },
727            // ── File Upload Attacks ──
728            AttackVector {
729                category: "file_upload", subcategory: "malicious_extension", severity: Critical,
730                mitre_id: "T1190",
731                search_location: "all",
732                context_keywords: &[],
733                patterns: &[
734                    r#"(?i)filename\s*=\s*["'][^"']+\.(php|jsp|asp|aspx|phtml|php5|php7|shtml|cgi|pl|war|jspx)['"]#"#,
735                    r#"(?i)Content-Disposition:.*filename=\\*['"][^'"]+\.(php|jsp|asp)['"]#"#,
736                    r"(?i)\.php\d*\.(jpg|png|gif|pdf)",
737                    r"(?i)\.(php|jsp|asp)\s*%00",
738                ],
739            },
740            // ── Open Redirect ──
741            AttackVector {
742                category: "open_redirect", subcategory: "url_param", severity: Medium,
743                mitre_id: "T1204.001",
744                search_location: "all",
745                context_keywords: &[],
746                patterns: &[
747                    r"(?i)(redirect|url|next|return|goto|target|dest|continue|back)\s*=\s*(https?://|//)[^&\s]+",
748                    r"(?i)(redirect|url|next|return|goto)\s*=\s*(evil|attacker|phish|malw)",
749                    r#"(?i)"(redirect|url|next)"\s*:\s*"(https?://|//)"#,
750                ],
751            },
752            // ── Cookie Manipulation ──
753            AttackVector {
754                category: "cookie_attack", subcategory: "injection", severity: Medium,
755                mitre_id: "T1539",
756                search_location: "headers",
757                context_keywords: &[],
758                patterns: &[
759                    r"(?i)Cookie\s*:\s*.*(<script|alert|onerror|javascript:)",
760                    r"(?i)Cookie\s*:\s*.*(../|\.\.\\\\)",
761                    r"(?i)Cookie\s*:\s*.*(SELECT|UNION)",
762                ],
763            },
764            // ── Cache Poisoning ──
765            AttackVector {
766                category: "cache_poisoning", subcategory: "header_probe", severity: High,
767                mitre_id: "T1499",
768                search_location: "headers",
769                context_keywords: &[],
770                patterns: &[
771                    r"(?i)^\s*X-Forwarded-(Scheme|Proto|Host|Port|Prefix)\s*:\s*(https?://)?[a-z]+",
772                    r"(?i)^\s*X-Original-URL\s*:\s*",
773                    r"(?i)^\s*X-Rewrite-URL\s*:\s*",
774                    r"(?i)^\s*X-HTTP-Method-Override\s*:\s*",
775                    r"(?i)^\s*X-Method-Override\s*:\s*",
776                ],
777            },
778            // ── Authentication Bypass ──
779            AttackVector {
780                category: "auth_bypass", subcategory: "header_forgery", severity: Critical,
781                mitre_id: "T1548",
782                search_location: "headers",
783                context_keywords: &[],
784                patterns: &[
785                    r"(?i)^\s*X-Forwarded-For\s*:\s*(127\.0\.0\.1|localhost|::1)",
786                    r"(?i)^\s*X-Remote-IP\s*:\s*(127\.0\.0\.1|10\.\d+\.\d+\.\d+)",
787                    r"(?i)^\s*X-Originating-IP\s*:\s*(127\.0\.0\.1)",
788                    r"(?i)^\s*X-Real-IP\s*:\s*(127\.0\.0\.1|10\.\d+\.\d+\.\d+)",
789                    r"(?i)^\s*Authorization\s*:\s*Basic\s+[A-Za-z0-9+/=]+={0,2}",
790                ],
791            },
792            // ── HTTP Parameter Pollution ──
793            AttackVector {
794                category: "hpp", subcategory: "duplicate_params", severity: Medium,
795                mitre_id: "T1190",
796                search_location: "query",
797                context_keywords: &[],
798                patterns: &[
799                    r#"(?i)([?&])[^?&=]+=[^?&=]+&[^?&=]+=&"#,
800                    r"(?i)([?&])[^?&=]+=[^?&=]+&(same_param)=[^?&=]+",
801                ],
802            },
803            // ── HTTP Method Tampering ──
804            AttackVector {
805                category: "method_tamper", subcategory: "method_override", severity: Medium,
806                mitre_id: "T1190",
807                search_location: "all",
808                context_keywords: &[],
809                patterns: &[
810                    r"(?i)_method\s*=\s*(PUT|DELETE|PATCH|OPTIONS|TRACE|CONNECT)",
811                    r"(?i)X-HTTP-Method\s*:\s*(PUT|DELETE)",
812                ],
813            },
814            // ── Null Byte Injection ──
815            AttackVector {
816                category: "null_byte", subcategory: "termination", severity: High,
817                mitre_id: "T1190",
818                search_location: "all",
819                context_keywords: &[],
820                patterns: &[
821                    r"(?i)%00\.(php|jsp|asp|html|txt|conf)",
822                    r"(?i)\.php%00",
823                    r"\x00[^\x00]*\.(php|jsp|asp)",
824                ],
825            },
826            // ── CORS Misconfiguration Probe ──
827            AttackVector {
828                category: "cors", subcategory: "origin_spoof", severity: Medium,
829                mitre_id: "T1190",
830                search_location: "headers",
831                context_keywords: &[],
832                patterns: &[
833                    r"(?i)^\s*Origin\s*:\s*https?://(evil|attacker|null|127\.0\.0\.1)",
834                    r"(?i)^\s*Origin\s*:\s*null",
835                ],
836            },
837            // ── Brute Force / Credential Stuffing ──
838            AttackVector {
839                category: "brute_force", subcategory: "multi_attempt", severity: High,
840                mitre_id: "T1110",
841                search_location: "body",
842                context_keywords: &[],
843                patterns: &[
844                    r#"(?i)(password|passwd|pwd|pin|secret|token)\s*=\s*['\"][^'\"]{1,20}['\"]"#,
845                    r#"(?i)\{"(email|username|user|login)"\s*:\s*"[^"]+"\s*,\s*"(password|passwd|pwd)"\s*:\s*""#,
846                ],
847            },
848            // ── Format String ──
849            AttackVector {
850                category: "format_string", subcategory: "printf_injection", severity: High,
851                mitre_id: "T1190",
852                search_location: "all",
853                context_keywords: &[],
854                patterns: &[
855                    r"(%[0-9]*\$)?%([xXndsSph]|p[rd]){1,2}",
856                    r"%[0-9]{1,2}\$[xdspnXDSPN]",
857                ],
858            },
859            // ── Race Condition Probing ──
860            AttackVector {
861                category: "race_condition", subcategory: "concurrent", severity: Medium,
862                mitre_id: "T1499",
863                search_location: "all",
864                context_keywords: &[],
865                patterns: &[
866                    r"(?i)(race|parallel|concurrent|thread)\s*=\s*(true|1|yes)",
867                ],
868            },
869            // ── Clickjacking Frame Attempts ──
870            AttackVector {
871                category: "clickjacking", subcategory: "frame_probe", severity: Low,
872                mitre_id: "T1499",
873                search_location: "all",
874                context_keywords: &[],
875                patterns: &[
876                    r#"(?i)<iframe[^>]*style\s*=\s*['\"]opacity\s*:\s*0"#,
877                    r#"(?i)<iframe[^>]*width\s*=\s*['\"]\d+['\"][^>]*height\s*=\s*['\"]\d+"#,
878                ],
879            },
880            // ── Source Map Extraction ──
881            AttackVector {
882                category: "source_leak", subcategory: "sourcemap_probe", severity: Low,
883                mitre_id: "T1213",
884                search_location: "all",
885                context_keywords: &[],
886                patterns: &[
887                    r"(?i)(\.js\.map|\.css\.map|//#\s*sourceMappingURL)",
888                    r"(?i)/_next/static/.*\.map$",
889                ],
890            },
891            // ── React/Next.js Specific Attacks ──
892            AttackVector {
893                category: "rsc_attack", subcategory: "flight_injection", severity: Critical,
894                mitre_id: "T1190",
895                search_location: "body",
896                context_keywords: &[],
897                patterns: &[
898                    r#"(?i)\[\["\$","@\w+",null,\{"#,
899                    r#"(?i)"type"\s*:\s*"blob_handler""#,
900                    r#"(?i)"dispatch"\s*:\s*"dynamic""#,
901                    r#"(?i)"method"\s*:\s*"child_process\.exec""#,
902                ],
903            },
904            AttackVector {
905                category: "rsc_attack", subcategory: "server_action_probe", severity: High,
906                mitre_id: "T1190",
907                search_location: "headers",
908                context_keywords: &[],
909                patterns: &[
910                    r"(?i)^\s*Next-Action\s*:",
911                    r"(?i)^\s*RSC\s*:\s*1",
912                    r"(?i)^\s*Content-Type\s*:\s*text/x-component",
913                    r"(?i)^\s*Next-Router-State-Tree\s*:",
914                ],
915            },
916            AttackVector {
917                category: "nextjs_probe", subcategory: "internal_route", severity: Medium,
918                mitre_id: "T1190",
919                search_location: "path",
920                context_keywords: &[],
921                patterns: &[
922                    r"^/_next/.*(webpack-hmr|__nextjs_|middleware)",
923                    r"^/_next/data/",
924                    r"^/_next/image\?url=",
925                ],
926            },
927            // ── WebSocket Attack Probing ──
928            AttackVector {
929                category: "websocket", subcategory: "injection", severity: High,
930                mitre_id: "T1190",
931                search_location: "all",
932                context_keywords: &[],
933                patterns: &[
934                    r"(?i)ws://(evil|attacker|localhost|127\.0\.0\.1)",
935                    r"(?i)Sec-WebSocket-Key\s*:\s*[A-Za-z0-9+/=]+",
936                ],
937            },
938            // ── DNS Exfiltration Probing ──
939            AttackVector {
940                category: "dns_exfil", subcategory: "tunnel_probe", severity: High,
941                mitre_id: "T1048.001",
942                search_location: "all",
943                context_keywords: &[],
944                patterns: &[
945                    r"(?i)(nslookup|dig|host)\s+\w{20,}\.[a-z]+\.[a-z]+",
946                    r"(?i)\.(burpcollaborator|interact\.sh|canarytokens|oastify)\.(com|net|io|pro|live|site|online|fun)",
947                ],
948            },
949            // ── Content-Type Confusion ──
950            AttackVector {
951                category: "content_type", subcategory: "mismatch_attack", severity: Medium,
952                mitre_id: "T1190",
953                search_location: "all",
954                context_keywords: &[],
955                patterns: &[
956                    r"(?i)Content-Type\s*:\s*text/html.*\{.*\}.*Content-Type\s*:\s*application/json",
957                ],
958            },
959            // ── Charset / Encoding Attacks ──
960            AttackVector {
961                category: "encoding_attack", subcategory: "charset_confusion", severity: Medium,
962                mitre_id: "T1190",
963                search_location: "all",
964                context_keywords: &[],
965                patterns: &[
966                    r"(?i)%u[0-9a-fA-F]{4}",
967                    r"(?i)&#x[0-9a-fA-F]+;",
968                    r"(?i)&#\d{2,};",
969                    r"(?i)[\\]x[0-9a-fA-F]{2}",
970                ],
971            },
972            // ── User-Agent Probing / Fake Crawlers ──
973            AttackVector {
974                category: "user_agent", subcategory: "fake_crawler", severity: Low,
975                mitre_id: "T1592",
976                search_location: "headers",
977                context_keywords: &[],
978                patterns: &[
979                    r"(?i)User-Agent\s*:\s*.*(sqlmap|nikto|nmap|burp|nessus|wpscan|dirbuster|gobuster|hydra)",
980                    r"(?i)User-Agent\s*:\s*.*(curl|wget|python|go-http|libwww|axios|node-fetch)",
981                ],
982            },
983            // ── API Key / Token Brute Force ──
984            AttackVector {
985                category: "credential_probe", subcategory: "token_brute", severity: High,
986                mitre_id: "T1110.001",
987                search_location: "all",
988                context_keywords: &[],
989                patterns: &[
990                    r"(?i)(Authorization|X-API-Key|X-Auth-Token|Bearer)\s*:\s*[A-Za-z0-9\-_\.]{20,}",
991                ],
992            },
993            // ── Session Fixation ──
994            AttackVector {
995                category: "session_fixation", subcategory: "cookie_set", severity: Medium,
996                mitre_id: "T1539",
997                search_location: "headers",
998                context_keywords: &[],
999                patterns: &[
1000                    r"(?i)Cookie\s*:\s*(SESSID|JSESSIONID|PHPSESSID|session_id|sid|connect\.sid)\s*=\s*[A-Za-z0-9]+",
1001                ],
1002            },
1003            // ── CSS Injection (data exfil) ──
1004            AttackVector {
1005                category: "css_injection", subcategory: "data_exfil", severity: Medium,
1006                mitre_id: "T1213",
1007                search_location: "all",
1008                context_keywords: &[],
1009                patterns: &[
1010                    r#"(?i)@import\s+url\s*\(\s*['\"]?https?://"#,
1011                    r#"(?i)background(-image)?\s*:\s*url\s*\(\s*['\"]?https?://"#,
1012                    r#"input\[type\s*=\s*["']password["']\][^{]*\{[^}]*background"#,
1013                ],
1014            },
1015    ])
1016}
1017
1018/// Pre-compiled attack vectors for fast detection.
1019fn compiled_vectors() -> &'static [CompiledVector] {
1020    static COMPILED: std::sync::OnceLock<Vec<CompiledVector>> = std::sync::OnceLock::new();
1021    COMPILED.get_or_init(|| {
1022        attack_vectors()
1023            .iter()
1024            .map(|av| {
1025                let patterns: Vec<Regex> = av
1026                    .patterns
1027                    .iter()
1028                    .filter_map(|p| Regex::new(p).ok())
1029                    .collect();
1030                CompiledVector {
1031                    category: av.category,
1032                    subcategory: av.subcategory,
1033                    patterns,
1034                    severity: av.severity.clone(),
1035                    mitre_id: av.mitre_id,
1036                    search_location: av.search_location,
1037                    context_keywords: av.context_keywords,
1038                }
1039            })
1040            .collect()
1041    })
1042}
1043
1044// ═════════════════════════════════════════════════════════════════════════════
1045// Honeypot Engine
1046// ═════════════════════════════════════════════════════════════════════════════
1047
1048/// The core honeypot detection and intelligence engine.
1049pub struct HoneypotEngine {
1050    config: HoneypotConfig,
1051    state: HoneypotState,
1052    /// Per-profile request timestamps for cadence analysis.
1053    request_times: HashMap<String, Vec<Instant>>,
1054    /// Fake RSC endpoint list for realistic simulation.
1055    rsc_endpoints: Vec<String>,
1056}
1057
1058impl HoneypotEngine {
1059    /// Create a new honeypot engine with default configuration.
1060    pub fn new() -> Self {
1061        Self::with_config(HoneypotConfig::default())
1062    }
1063
1064    /// Create a new honeypot engine with custom configuration.
1065    pub fn with_config(config: HoneypotConfig) -> Self {
1066        Self {
1067            config: config.clone(),
1068            state: HoneypotState {
1069                config,
1070                total_requests: 0,
1071                total_attacks_detected: 0,
1072                unique_attackers: 0,
1073                attack_events: Vec::new(),
1074                attacker_profiles: HashMap::new(),
1075                uptime_seconds: 0.0,
1076                requests_per_minute: 0.0,
1077            },
1078            request_times: HashMap::new(),
1079            rsc_endpoints: vec![
1080                "/_rsc/__PAGE__".to_string(),
1081                "/api/graphql".to_string(),
1082                "/api/auth/callback".to_string(),
1083                "/api/chat".to_string(),
1084                "/api/upload".to_string(),
1085                "/api/search".to_string(),
1086                "/api/admin/settings".to_string(),
1087                "/dashboard".to_string(),
1088            ],
1089        }
1090    }
1091
1092    /// Process a raw HTTP request and return detection results.
1093    pub fn process_request(&mut self, req: &RawRequest) -> DetectionResult {
1094        let _start = Instant::now();
1095        self.state.total_requests += 1;
1096
1097        // Update request rate
1098        self.state.uptime_seconds = (Utc::now().timestamp_millis() as f64) / 1000.0;
1099        if self.state.uptime_seconds > 0.0 {
1100            self.state.requests_per_minute =
1101                (self.state.total_requests as f64 / self.state.uptime_seconds) * 60.0;
1102        }
1103
1104        let profile_id = self.get_or_create_profile_id(req);
1105        self.update_request_times(&profile_id);
1106
1107        // Detect attacks
1108        let detections = self.detect_attacks(req, &profile_id);
1109
1110        let attack_count = detections.len() as u64;
1111        if attack_count > 0 {
1112            self.state.total_attacks_detected += attack_count;
1113            for det in &detections {
1114                self.state.attack_events.push(det.clone());
1115                // Limit stored events
1116                if self.state.attack_events.len() > 10000 {
1117                    self.state.attack_events.drain(0..1000);
1118                }
1119            }
1120        } else if self.config.log_all_requests {
1121            // Log clean requests too if configured
1122            let event = AttackEvent {
1123                event_id: Self::generate_event_id(),
1124                timestamp: Utc::now().to_rfc3339(),
1125                category: "clean".to_string(),
1126                subcategory: "passive".to_string(),
1127                matched_payload: String::new(),
1128                full_payload: String::new(),
1129                method: req.method.clone(),
1130                path: req.path.clone(),
1131                severity: Severity::Info,
1132                mitre_id: None,
1133                simulated_response: 200,
1134                attacker_ip: req.ip.clone(),
1135                user_agent: req.headers.get("user-agent").cloned().unwrap_or_default(),
1136                headers: req.headers.clone(),
1137                session_id: Some(profile_id.clone()),
1138                confidence: 0.0,
1139            };
1140            self.state.attack_events.push(event);
1141            if self.state.attack_events.len() > 10000 {
1142                self.state.attack_events.drain(0..1000);
1143            }
1144        }
1145
1146        // Update or create attacker profile
1147        self.update_attacker_profile(req, &profile_id, &detections);
1148
1149        // Evict stale profiles periodically (keep top 5000)
1150        if self.state.attacker_profiles.len() > 10000 {
1151            self.evict_profiles();
1152        }
1153
1154        // Generate simulated response
1155        let simulated_status = self.simulate_status(&detections);
1156        let simulated_body = self.simulate_body(req, &detections);
1157        let content_type = self.simulate_content_type(req);
1158        let should_block = self.should_block_request(&detections);
1159        let suggested_delay = self.calculate_delay(&detections);
1160
1161        DetectionResult {
1162            detections,
1163            simulated_status,
1164            simulated_body,
1165            content_type,
1166            should_block,
1167            suggested_delay_ms: suggested_delay.as_millis() as u64,
1168        }
1169    }
1170
1171    /// Analyze all 45+ attack vectors against a request.
1172    fn detect_attacks(&mut self, req: &RawRequest, profile_id: &str) -> Vec<AttackEvent> {
1173        let mut events = Vec::new();
1174
1175        for vector in compiled_vectors().iter() {
1176            let search_text = match vector.search_location {
1177                "body" => &req.body,
1178                "query" => &req.query_string,
1179                "path" => &req.path,
1180                "headers" => &self.headers_as_string(&req.headers),
1181                "all" => &self.all_request_text(req),
1182                _ => &self.all_request_text(req),
1183            };
1184
1185            if search_text.is_empty() {
1186                continue;
1187            }
1188
1189            // Check context keywords (AND logic)
1190            if !vector.context_keywords.is_empty() {
1191                let has_context = vector
1192                    .context_keywords
1193                    .iter()
1194                    .any(|kw| search_text.to_lowercase().contains(&kw.to_lowercase()));
1195                if !has_context {
1196                    continue;
1197                }
1198            }
1199
1200            for re in &vector.patterns {
1201                if let Some(m) = re.find(search_text) {
1202                    let matched = m.as_str().to_string();
1203                    let confidence = self.calculate_confidence(vector, &matched, search_text);
1204
1205                    if confidence >= self.config.detection_threshold {
1206                        events.push(AttackEvent {
1207                            event_id: Self::generate_event_id(),
1208                            timestamp: Utc::now().to_rfc3339(),
1209                            category: vector.category.to_string(),
1210                            subcategory: vector.subcategory.to_string(),
1211                            matched_payload: Self::truncate_str(&matched, 500),
1212                            full_payload: Self::truncate_str(
1213                                search_text,
1214                                self.config.max_payload_store,
1215                            ),
1216                            method: req.method.clone(),
1217                            path: req.path.clone(),
1218                            severity: vector.severity.clone(),
1219                            mitre_id: Some(vector.mitre_id.to_string()),
1220                            simulated_response: 0, // Filled later
1221                            attacker_ip: req.ip.clone(),
1222                            user_agent: req.headers.get("user-agent").cloned().unwrap_or_default(),
1223                            headers: req.headers.clone(),
1224                            session_id: Some(profile_id.to_string()),
1225                            confidence,
1226                        });
1227                        break; // One match per vector category is enough
1228                    }
1229                }
1230            }
1231        }
1232
1233        // Fill simulated_response after detection
1234        for event in &mut events {
1235            event.simulated_response = self.simulate_status_for_event(event);
1236        }
1237
1238        events
1239    }
1240
1241    /// Calculate detection confidence based on pattern specificity and context.
1242    fn calculate_confidence(
1243        &self,
1244        _vector: &CompiledVector,
1245        matched: &str,
1246        full_text: &str,
1247    ) -> f64 {
1248        let mut confidence = 0.5; // Base confidence
1249
1250        // Longer patterns are more specific
1251        let specificity_bonus = (matched.len() as f64 / 30.0).min(0.3);
1252        confidence += specificity_bonus;
1253
1254        // Multiple patterns from same category (already checked via context_keywords)
1255        // Count additional keyword signals
1256        let keyword_signals = [
1257            ("eval", 0.05),
1258            ("exec", 0.05),
1259            ("system", 0.05),
1260            ("import", 0.03),
1261            ("require", 0.03),
1262            ("base64", 0.04),
1263            ("fromCharCode", 0.05),
1264            ("String.fromCharCode", 0.06),
1265            ("atob", 0.03),
1266            ("charCodeAt", 0.03),
1267            ("document.cookie", 0.06),
1268            ("window.location", 0.04),
1269            ("XMLHttpRequest", 0.03),
1270            ("fetch(", 0.02),
1271            ("curl", 0.04),
1272            ("wget", 0.04),
1273            ("nc ", 0.05),
1274            ("/bin/bash", 0.06),
1275            ("/bin/sh", 0.06),
1276            ("cmd.exe", 0.06),
1277            ("powershell", 0.06),
1278            ("reverse", 0.04),
1279            ("shell", 0.05),
1280            ("backdoor", 0.06),
1281            ("trojan", 0.06),
1282            ("exploit", 0.05),
1283        ];
1284
1285        let lower = full_text.to_lowercase();
1286        for (signal, bonus) in keyword_signals {
1287            if lower.contains(signal) {
1288                confidence += bonus;
1289            }
1290        }
1291
1292        confidence.min(1.0)
1293    }
1294
1295    /// Determine the simulated HTTP status for a detection set.
1296    fn simulate_status(&self, detections: &[AttackEvent]) -> u16 {
1297        if detections.is_empty() {
1298            return 200;
1299        }
1300        let has_critical = detections.iter().any(|d| d.severity == Severity::Critical);
1301        let has_high = detections.iter().any(|d| d.severity == Severity::High);
1302
1303        if has_critical {
1304            500 // Internal error — don't tip off the attacker
1305        } else if has_high {
1306            400 // Bad request
1307        } else {
1308            200 // Appear normal for low/medium severity
1309        }
1310    }
1311
1312    fn simulate_status_for_event(&self, event: &AttackEvent) -> u16 {
1313        match event.severity {
1314            Severity::Critical => 500,
1315            Severity::High => 400,
1316            Severity::Medium | Severity::Low | Severity::Info => 200,
1317        }
1318    }
1319
1320    /// Check whether a request targets an RSC endpoint.
1321    fn is_rsc_request(&self, req: &RawRequest) -> bool {
1322        self.rsc_endpoints.iter().any(|ep| req.path.starts_with(ep))
1323            || req
1324                .headers
1325                .get("content-type")
1326                .map(|ct| ct.contains("text/x-component"))
1327                .unwrap_or(false)
1328            || req.headers.contains_key("next-action")
1329    }
1330
1331    /// Generate a realistic fake response body.
1332    fn simulate_body(&self, req: &RawRequest, _detections: &[AttackEvent]) -> String {
1333        if !self.config.fake_rsc_responses {
1334            return String::new();
1335        }
1336
1337        let is_rsc = self.is_rsc_request(req);
1338        let mut body = if is_rsc {
1339            self.generate_fake_rsc_response(req)
1340        } else if req.path.contains("/api/") {
1341            self.generate_fake_api_response(req)
1342        } else {
1343            self.generate_fake_html_response(req)
1344        };
1345
1346        // Progressive sizing: pad response with irrelevant data to simulate
1347        // a real app's variable response sizes and keep attackers engaged
1348        if self.config.progressive_sizing {
1349            let extra_bytes = 128 + (Utc::now().timestamp_millis() as usize % 1024);
1350            body.push_str(&" ".repeat(extra_bytes / 32));
1351        }
1352
1353        body
1354    }
1355
1356    /// Generate a fake React Server Components Flight-protocol response.
1357    fn generate_fake_rsc_response(&self, _req: &RawRequest) -> String {
1358        let responses = [
1359            r#"0:["$","@2",null,{"id":"__PAGE__","children":[["$","@3",null,{"name":"Page","props":{}}]]}]
13601:{"status":"resolved","data":{"pageProps":{"title":"Dashboard","user":{"name":"Admin User","role":"administrator","email":"admin@internal.local"}}}}
13612:["$","div",null,{"className":"page-wrapper","children":[["$","header",null,{"children":"Dashboard"}],["$","main",null,{"children":["$","p",null,{"children":"Welcome back, Admin User"},"$","@4",null,{}]}]]
13623:{"status":"pending","chunks":["@5","@6"]}"#,
1363            r#"0:["$","@2",null,{"id":"__PAGE__"}]
13641:{"status":"resolved","data":{"pageProps":{"items":[{"id":1,"name":"Project Alpha","status":"active","owner":"admin"},{"id":2,"name":"Project Beta","status":"inactive","owner":"user2"},{"id":3,"name":"API Gateway","status":"active","owner":"admin"}]}}}
13652:"$6eb96e9c8e4a3f1b2d5c7a8e9f0a1b2c""#,
1366            r#"0:["$","@2",null,{"id":"__PAGE__","children":[["$","@3",null,{"name":"ErrorBoundary"}]],"fallback":["$","@4",null,{"name":"SuspenseFallback"}]}]
13671:{"status":"pending","chunks":["@5"]}
13682:["$","div",null,{"className":"layout","children":["$","nav",null,{"children":[["$","a",null,{"href":"/dashboard","children":"Dashboard"}],["$","a",null,{"href":"/settings","children":"Settings"}]]}]}"#,
1369        ];
1370
1371        let idx = (Utc::now().timestamp_millis() as usize) % responses.len();
1372        responses[idx].to_string()
1373    }
1374
1375    /// Generate a fake JSON API response.
1376    fn generate_fake_api_response(&self, req: &RawRequest) -> String {
1377        if req.path.contains("/graphql") {
1378            r#"{"data":{"__typename":"Query","node":{"id":"UHJvamVjdDox","name":"Internal Project","owner":{"login":"admin","email":"admin@internal.local"}}}}"#.to_string()
1379        } else {
1380            r#"{"success":true,"data":{"id":"67f1a2b3c4d5","status":"ok","timestamp":"2026-05-06T12:00:00Z","message":"Operation completed"}}"#.to_string()
1381        }
1382    }
1383
1384    /// Generate a fake HTML page response.
1385    fn generate_fake_html_response(&self, _req: &RawRequest) -> String {
1386        format!(
1387            r#"<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="generator" content="Next.js 15.2.3"><meta name="viewport" content="width=device-width,initial-scale=1"><title>Internal Dashboard</title><link rel="preload" href="/_next/static/chunks/app/layout-{}.js" as="script"></head><body><div id="__next"><div class="app-shell"><header class="topbar"><nav><a href="/dashboard">Dashboard</a><a href="/admin">Admin</a><a href="/api/docs">API</a></nav><div class="user-menu">Signed in as <strong>Admin User</strong></div></header><main><!--$?--><template id="B:0"></template><div class="skeleton-loader"><div class="skeleton-card"></div><div class="skeleton-row"></div><div class="skeleton-row short"></div></div><!--/$--></main></div></div><script src="/_next/static/chunks/main-app-{}.js" async></script></body></html>"#,
1388            Self::random_hex(16),
1389            Self::random_hex(16)
1390        )
1391    }
1392
1393    fn simulate_content_type(&self, req: &RawRequest) -> String {
1394        let is_rsc = self.is_rsc_request(req);
1395
1396        if is_rsc {
1397            "text/x-component; charset=utf-8".to_string()
1398        } else if req.path.contains("/api/") {
1399            "application/json; charset=utf-8".to_string()
1400        } else {
1401            "text/html; charset=utf-8".to_string()
1402        }
1403    }
1404
1405    /// Decide whether to block based on severity.
1406    fn should_block_request(&self, detections: &[AttackEvent]) -> bool {
1407        detections
1408            .iter()
1409            .any(|d| d.severity == Severity::Critical || d.confidence > 0.9)
1410    }
1411
1412    /// Calculate realistic response delay.
1413    fn calculate_delay(&self, _detections: &[AttackEvent]) -> Duration {
1414        if !self.config.realistic_timing {
1415            return Duration::from_millis(0);
1416        }
1417        use std::collections::hash_map::DefaultHasher;
1418        use std::hash::{Hash, Hasher};
1419        let mut hasher = DefaultHasher::new();
1420        Utc::now()
1421            .timestamp_nanos_opt()
1422            .unwrap_or(0)
1423            .hash(&mut hasher);
1424        let hash = hasher.finish();
1425        let jitter = hash % (self.config.max_delay_ms - self.config.min_delay_ms + 1);
1426        Duration::from_millis(self.config.min_delay_ms + jitter)
1427    }
1428
1429    // ── Attacker Profiling ────────────────────────────────────────────────
1430
1431    /// Get or create a profile ID for an attacker.
1432    fn get_or_create_profile_id(&mut self, req: &RawRequest) -> String {
1433        let fingerprint = self.build_fingerprint(req);
1434        let hashed = Self::hash_str(&fingerprint);
1435        let profile_id = format!("prof_{}", &hashed[..16]);
1436
1437        if !self.state.attacker_profiles.contains_key(&profile_id) {
1438            self.state.attacker_profiles.insert(
1439                profile_id.clone(),
1440                AttackerProfile {
1441                    profile_id: profile_id.clone(),
1442                    ip: req.ip.clone(),
1443                    country: None,
1444                    asn: None,
1445                    is_tor: false,
1446                    is_cloud: false,
1447                    is_proxy: false,
1448                    user_agent: req.headers.get("user-agent").cloned().unwrap_or_default(),
1449                    browser_fingerprint: self.parse_user_agent(
1450                        req.headers
1451                            .get("user-agent")
1452                            .map(|s| s.as_str())
1453                            .unwrap_or(""),
1454                    ),
1455                    first_seen: Utc::now().to_rfc3339(),
1456                    last_seen: Utc::now().to_rfc3339(),
1457                    total_requests: 0,
1458                    attack_categories: HashMap::new(),
1459                    techniques_used: Vec::new(),
1460                    avg_request_interval: 0.0,
1461                    is_automated: false,
1462                    risk_score: 0.0,
1463                    targets: Vec::new(),
1464                    event_timeline: Vec::new(),
1465                },
1466            );
1467            self.state.unique_attackers = self.state.attacker_profiles.len();
1468        }
1469
1470        profile_id
1471    }
1472
1473    /// Build a fingerprint from request characteristics.
1474    fn build_fingerprint(&self, req: &RawRequest) -> String {
1475        let ua = req
1476            .headers
1477            .get("user-agent")
1478            .map(|s| s.as_str())
1479            .unwrap_or("");
1480        let accept = req.headers.get("accept").map(|s| s.as_str()).unwrap_or("");
1481        let accept_lang = req
1482            .headers
1483            .get("accept-language")
1484            .map(|s| s.as_str())
1485            .unwrap_or("");
1486        let accept_enc = req
1487            .headers
1488            .get("accept-encoding")
1489            .map(|s| s.as_str())
1490            .unwrap_or("");
1491
1492        format!(
1493            "{}|{}|{}|{}|{}",
1494            req.ip, ua, accept, accept_lang, accept_enc
1495        )
1496    }
1497
1498    /// Update request time tracking for cadence analysis.
1499    fn update_request_times(&mut self, profile_id: &str) {
1500        let times = self
1501            .request_times
1502            .entry(profile_id.to_string())
1503            .or_default();
1504        times.push(Instant::now());
1505        // Keep only last 100 timestamps
1506        if times.len() > 100 {
1507            times.drain(0..times.len() - 100);
1508        }
1509
1510        // Update automaton detection
1511        if let Some(profile) = self.state.attacker_profiles.get_mut(profile_id) {
1512            profile.total_requests += 1;
1513            profile.last_seen = Utc::now().to_rfc3339();
1514            if times.len() >= 3 {
1515                let intervals: Vec<f64> = times
1516                    .windows(2)
1517                    .map(|w| w[1].duration_since(w[0]).as_secs_f64())
1518                    .collect();
1519                profile.avg_request_interval =
1520                    intervals.iter().sum::<f64>() / intervals.len() as f64;
1521
1522                // Detect automation: very consistent timing or very fast
1523                if profile.total_requests >= 10 {
1524                    let std_dev = Self::std_dev(&intervals, profile.avg_request_interval);
1525                    profile.is_automated = profile.avg_request_interval < 0.1
1526                        || (profile.avg_request_interval < 0.5 && std_dev < 0.05);
1527                }
1528            }
1529        }
1530    }
1531
1532    /// Update the attacker profile with detection results.
1533    fn update_attacker_profile(
1534        &mut self,
1535        req: &RawRequest,
1536        profile_id: &str,
1537        detections: &[AttackEvent],
1538    ) {
1539        if let Some(profile) = self.state.attacker_profiles.get_mut(profile_id) {
1540            for det in detections {
1541                *profile
1542                    .attack_categories
1543                    .entry(det.category.clone())
1544                    .or_insert(0) += 1;
1545                if !profile.techniques_used.contains(&det.subcategory) {
1546                    profile.techniques_used.push(det.subcategory.clone());
1547                }
1548                profile.event_timeline.push(format!(
1549                    "{} | {}:{} | {} | conf={:.2}",
1550                    &det.timestamp[..19],
1551                    det.category,
1552                    det.subcategory,
1553                    det.severity.clone().serde_name().unwrap_or("unknown"),
1554                    det.confidence
1555                ));
1556            }
1557            if !profile.targets.contains(&req.path) {
1558                profile.targets.push(req.path.clone());
1559            }
1560
1561            // Calculate risk score (0-100)
1562            profile.risk_score = Self::calculate_risk_score(profile);
1563        }
1564    }
1565
1566    /// Evict low-activity profiles to prevent unbounded memory growth.
1567    /// Keeps the top 5000 profiles sorted by risk_score (descending).
1568    fn evict_profiles(&mut self) {
1569        let mut sorted: Vec<(String, f64)> = self
1570            .state
1571            .attacker_profiles
1572            .iter()
1573            .map(|(id, p)| (id.clone(), p.risk_score))
1574            .collect();
1575        sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1576        let keep_ids: std::collections::HashSet<String> =
1577            sorted.iter().take(5000).map(|(id, _)| id.clone()).collect();
1578        self.state
1579            .attacker_profiles
1580            .retain(|id, _| keep_ids.contains(id));
1581        self.state.unique_attackers = self.state.attacker_profiles.len();
1582    }
1583
1584    /// Calculate a cumulative risk score for an attacker profile.
1585    fn calculate_risk_score(profile: &AttackerProfile) -> f64 {
1586        let mut score = 0.0;
1587
1588        // Severity-based scoring
1589        let severity_weights: HashMap<&str, f64> = [
1590            ("sqli", 15.0),
1591            ("cmdi", 15.0),
1592            ("rce", 15.0),
1593            ("deserialization", 15.0),
1594            ("xxe", 14.0),
1595            ("ssti", 14.0),
1596            ("lfi", 13.0),
1597            ("rfi", 13.0),
1598            ("ssrf", 12.0),
1599            ("nosqli", 12.0),
1600            ("xss", 8.0),
1601            ("path_traversal", 8.0),
1602            ("file_upload", 10.0),
1603            ("crlf", 7.0),
1604            ("http_smuggling", 9.0),
1605            ("jwt", 9.0),
1606            ("auth_bypass", 10.0),
1607            ("prototype_pollution", 8.0),
1608            ("rsc_attack", 14.0),
1609            ("dns_exfil", 6.0),
1610        ]
1611        .into_iter()
1612        .collect();
1613
1614        for (cat, count) in &profile.attack_categories {
1615            let weight = severity_weights.get(cat.as_str()).copied().unwrap_or(3.0);
1616            score += weight * (*count as f64).min(3.0); // Cap at 3x per category
1617        }
1618
1619        // Multiplier for diverse techniques
1620        let technique_bonus = (profile.techniques_used.len() as f64 * 2.0).min(20.0);
1621        score += technique_bonus;
1622
1623        // Automation bonus
1624        if profile.is_automated {
1625            score += 10.0;
1626        }
1627
1628        // Request volume bonus
1629        if profile.total_requests > 100 {
1630            score += 5.0;
1631        }
1632        if profile.total_requests > 500 {
1633            score += 5.0;
1634        }
1635
1636        score.min(100.0)
1637    }
1638
1639    // ── User-Agent Parsing ─────────────────────────────────────────────────
1640
1641    /// Parse User-Agent string into structured browser/OS fingerprint.
1642    fn parse_user_agent(&self, ua: &str) -> Option<BrowserFingerprint> {
1643        if ua.is_empty() {
1644            return None;
1645        }
1646
1647        let ua_lower = ua.to_lowercase();
1648
1649        // Browser detection
1650        let (browser, browser_version) = if ua_lower.contains("firefox") {
1651            ("Firefox", Self::extract_version(ua, "Firefox/"))
1652        } else if ua_lower.contains("edg") {
1653            ("Edge", Self::extract_version(ua, "Edg/"))
1654        } else if ua_lower.contains("chrome") && !ua_lower.contains("chromium") {
1655            ("Chrome", Self::extract_version(ua, "Chrome/"))
1656        } else if ua_lower.contains("safari") && !ua_lower.contains("chrome") {
1657            ("Safari", Self::extract_version(ua, "Version/"))
1658        } else if ua_lower.contains("opera") || ua_lower.contains("opr") {
1659            ("Opera", Self::extract_version(ua, "OPR/"))
1660        } else if ua_lower.contains("msie") || ua_lower.contains("trident") {
1661            ("Internet Explorer", Self::extract_version(ua, "MSIE "))
1662        } else {
1663            ("Unknown", "0.0".to_string())
1664        };
1665
1666        // OS detection
1667        let (os, os_version) = if ua_lower.contains("windows nt 10") {
1668            ("Windows", "10/11".to_string())
1669        } else if ua_lower.contains("windows nt 6.3") {
1670            ("Windows", "8.1".to_string())
1671        } else if ua_lower.contains("windows nt 6.1") {
1672            ("Windows", "7".to_string())
1673        } else if ua_lower.contains("mac os x") {
1674            ("macOS", Self::extract_version(ua, "Mac OS X "))
1675        } else if ua_lower.contains("android") {
1676            ("Android", Self::extract_version(ua, "Android "))
1677        } else if ua_lower.contains("iphone") || ua_lower.contains("ipad") {
1678            ("iOS", Self::extract_version(ua, "OS "))
1679        } else if ua_lower.contains("linux") {
1680            ("Linux", "".to_string())
1681        } else {
1682            ("Unknown", "".to_string())
1683        };
1684
1685        let engine = if ua_lower.contains("webkit") {
1686            "WebKit"
1687        } else if ua_lower.contains("gecko") {
1688            "Gecko"
1689        } else if ua_lower.contains("trident") {
1690            "Trident"
1691        } else {
1692            "Unknown"
1693        };
1694
1695        let device_type = if ua_lower.contains("mobile") || ua_lower.contains("android") {
1696            "Mobile"
1697        } else if ua_lower.contains("tablet") || ua_lower.contains("ipad") {
1698            "Tablet"
1699        } else {
1700            "Desktop"
1701        };
1702
1703        let is_headless = ua_lower.contains("headless")
1704            || ua_lower.contains("phantom")
1705            || ua_lower.contains("puppeteer")
1706            || ua_lower.contains("playwright")
1707            || ua_lower.contains("selenium");
1708
1709        Some(BrowserFingerprint {
1710            browser: browser.to_string(),
1711            browser_version,
1712            os: os.to_string(),
1713            os_version,
1714            engine: engine.to_string(),
1715            device_type: device_type.to_string(),
1716            is_headless,
1717        })
1718    }
1719
1720    fn extract_version(ua: &str, prefix: &str) -> String {
1721        if let Some(pos) = ua.find(prefix) {
1722            let start = pos + prefix.len();
1723            let end = ua[start..]
1724                .find([' ', ';', ')'])
1725                .map(|p| start + p)
1726                .unwrap_or(ua.len());
1727            let version = &ua[start..end];
1728            // Take only major.minor
1729            version.split('.').take(2).collect::<Vec<_>>().join(".")
1730        } else {
1731            "0.0".to_string()
1732        }
1733    }
1734
1735    // ── Helpers ────────────────────────────────────────────────────────────
1736
1737    fn all_request_text(&self, req: &RawRequest) -> String {
1738        format!(
1739            "{} {}?{} {} {}",
1740            req.method,
1741            req.path,
1742            req.query_string,
1743            req.body,
1744            self.headers_as_string(&req.headers)
1745        )
1746    }
1747
1748    fn headers_as_string(&self, headers: &HashMap<String, String>) -> String {
1749        headers
1750            .iter()
1751            .map(|(k, v)| format!("{}: {}", k, v))
1752            .collect::<Vec<_>>()
1753            .join("\n")
1754    }
1755
1756    fn generate_event_id() -> String {
1757        format!(
1758            "evt_{}_{}",
1759            Utc::now().timestamp_millis(),
1760            Self::random_hex(8)
1761        )
1762    }
1763
1764    fn truncate_str(s: &str, max_len: usize) -> String {
1765        if s.len() <= max_len {
1766            s.to_string()
1767        } else {
1768            format!(
1769                "{}...[truncated {} bytes]",
1770                &s[..max_len],
1771                s.len() - max_len
1772            )
1773        }
1774    }
1775
1776    fn random_hex(len: usize) -> String {
1777        use std::collections::hash_map::DefaultHasher;
1778        use std::hash::{Hash, Hasher};
1779        let mut hasher = DefaultHasher::new();
1780        Utc::now()
1781            .timestamp_nanos_opt()
1782            .unwrap_or(0)
1783            .hash(&mut hasher);
1784        format!("{:016x}", hasher.finish())[..len].to_string()
1785    }
1786
1787    fn hash_str(s: &str) -> String {
1788        use std::collections::hash_map::DefaultHasher;
1789        use std::hash::{Hash, Hasher};
1790        let mut hasher = DefaultHasher::new();
1791        s.hash(&mut hasher);
1792        format!("{:016x}", hasher.finish())
1793    }
1794
1795    fn std_dev(values: &[f64], mean: f64) -> f64 {
1796        if values.len() < 2 {
1797            return 0.0;
1798        }
1799        let variance =
1800            values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
1801        variance.sqrt()
1802    }
1803
1804    // ── Public API ─────────────────────────────────────────────────────────
1805
1806    /// Get the current honeypot state (for dashboard/export).
1807    pub fn get_state(&self) -> &HoneypotState {
1808        &self.state
1809    }
1810
1811    /// Get a snapshot of all attacker profiles.
1812    pub fn get_profiles(&self) -> Vec<&AttackerProfile> {
1813        self.state.attacker_profiles.values().collect()
1814    }
1815
1816    /// Get a specific attacker profile by ID.
1817    pub fn get_profile(&self, profile_id: &str) -> Option<&AttackerProfile> {
1818        self.state.attacker_profiles.get(profile_id)
1819    }
1820
1821    /// Get top-N most dangerous attacker profiles.
1822    pub fn get_top_threats(&self, n: usize) -> Vec<&AttackerProfile> {
1823        let mut profiles: Vec<&AttackerProfile> = self.state.attacker_profiles.values().collect();
1824        profiles.sort_by(|a, b| {
1825            b.risk_score
1826                .partial_cmp(&a.risk_score)
1827                .unwrap_or(std::cmp::Ordering::Equal)
1828        });
1829        profiles.truncate(n);
1830        profiles
1831    }
1832
1833    /// Export full state as JSON.
1834    pub fn export_json(&self) -> serde_json::Result<String> {
1835        serde_json::to_string_pretty(&self.state)
1836    }
1837
1838    /// Reset all state.
1839    pub fn reset(&mut self) {
1840        self.state = HoneypotState {
1841            config: self.config.clone(),
1842            total_requests: 0,
1843            total_attacks_detected: 0,
1844            unique_attackers: 0,
1845            attack_events: Vec::new(),
1846            attacker_profiles: HashMap::new(),
1847            uptime_seconds: 0.0,
1848            requests_per_minute: 0.0,
1849        };
1850        self.request_times.clear();
1851    }
1852}
1853
1854impl Default for HoneypotEngine {
1855    fn default() -> Self {
1856        Self::new()
1857    }
1858}
1859
1860impl Severity {
1861    fn serde_name(&self) -> Option<&str> {
1862        match self {
1863            Severity::Info => Some("info"),
1864            Severity::Low => Some("low"),
1865            Severity::Medium => Some("medium"),
1866            Severity::High => Some("high"),
1867            Severity::Critical => Some("critical"),
1868        }
1869    }
1870}
1871
1872// ═════════════════════════════════════════════════════════════════════════════
1873// Tests
1874// ═════════════════════════════════════════════════════════════════════════════
1875
1876#[cfg(test)]
1877mod tests {
1878    use super::*;
1879
1880    fn make_request(
1881        method: &str,
1882        path: &str,
1883        body: &str,
1884        headers: Vec<(&str, &str)>,
1885    ) -> RawRequest {
1886        let mut h = HashMap::new();
1887        for (k, v) in headers {
1888            h.insert(k.to_lowercase(), v.to_string());
1889        }
1890        RawRequest {
1891            method: method.to_string(),
1892            path: path.to_string(),
1893            query_string: String::new(),
1894            body: body.to_string(),
1895            headers: h,
1896            ip: "192.168.1.100".to_string(),
1897            timestamp: Utc::now(),
1898        }
1899    }
1900
1901    #[test]
1902    fn test_sqli_detection() {
1903        let mut engine = HoneypotEngine::new();
1904        let req = make_request(
1905            "GET",
1906            "/login",
1907            "username=admin' OR '1'='1&password=test",
1908            vec![],
1909        );
1910        let result = engine.process_request(&req);
1911        let sqli = result
1912            .detections
1913            .iter()
1914            .filter(|d| d.category == "sqli")
1915            .collect::<Vec<_>>();
1916        assert!(!sqli.is_empty(), "Should detect SQL injection");
1917    }
1918
1919    #[test]
1920    fn test_xss_detection() {
1921        let mut engine = HoneypotEngine::new();
1922        let req = make_request("GET", "/search", "q=<script>alert('XSS')</script>", vec![]);
1923        let result = engine.process_request(&req);
1924        let xss = result
1925            .detections
1926            .iter()
1927            .filter(|d| d.category == "xss")
1928            .collect::<Vec<_>>();
1929        assert!(!xss.is_empty(), "Should detect XSS");
1930    }
1931
1932    #[test]
1933    fn test_cmdi_detection() {
1934        let mut engine = HoneypotEngine::new();
1935        let req = make_request("POST", "/api/exec", "cmd=;id", vec![]);
1936        let result = engine.process_request(&req);
1937        let cmdi = result
1938            .detections
1939            .iter()
1940            .filter(|d| d.category == "cmdi")
1941            .collect::<Vec<_>>();
1942        assert!(!cmdi.is_empty(), "Should detect command injection");
1943    }
1944
1945    #[test]
1946    fn test_path_traversal_detection() {
1947        let mut engine = HoneypotEngine::new();
1948        let req = make_request("GET", "/download", "file=../../../etc/passwd", vec![]);
1949        let result = engine.process_request(&req);
1950        let pt = result
1951            .detections
1952            .iter()
1953            .filter(|d| d.category == "path_traversal")
1954            .collect::<Vec<_>>();
1955        assert!(!pt.is_empty(), "Should detect path traversal");
1956    }
1957
1958    #[test]
1959    fn test_ssti_detection() {
1960        let mut engine = HoneypotEngine::new();
1961        let req = make_request("POST", "/contact", "name={{7*7}}", vec![]);
1962        let result = engine.process_request(&req);
1963        let ssti = result
1964            .detections
1965            .iter()
1966            .filter(|d| d.category == "ssti")
1967            .collect::<Vec<_>>();
1968        assert!(!ssti.is_empty(), "Should detect SSTI ({{7*7}})");
1969    }
1970
1971    #[test]
1972    fn test_lfi_detection() {
1973        let mut engine = HoneypotEngine::new();
1974        let req = make_request("GET", "/view", "page=/etc/passwd", vec![]);
1975        let result = engine.process_request(&req);
1976        let lfi = result
1977            .detections
1978            .iter()
1979            .filter(|d| d.category == "lfi")
1980            .collect::<Vec<_>>();
1981        assert!(!lfi.is_empty(), "Should detect LFI (/etc/passwd)");
1982    }
1983
1984    #[test]
1985    fn test_ssrf_metadata_detection() {
1986        let mut engine = HoneypotEngine::new();
1987        let req = make_request(
1988            "POST",
1989            "/api/fetch",
1990            "url=http://169.254.169.254/latest/meta-data/",
1991            vec![],
1992        );
1993        let result = engine.process_request(&req);
1994        let ssrf = result
1995            .detections
1996            .iter()
1997            .filter(|d| d.category == "ssrf")
1998            .collect::<Vec<_>>();
1999        assert!(!ssrf.is_empty(), "Should detect SSRF cloud metadata probe");
2000    }
2001
2002    #[test]
2003    fn test_rsc_flight_detection() {
2004        let mut engine = HoneypotEngine::new();
2005        let req = make_request(
2006            "POST",
2007            "/",
2008            r#"0:[["$","@1",null,{"id":"malicious_component","chunks":[]}]]"#,
2009            vec![
2010                ("Content-Type", "text/x-component"),
2011                ("Next-Action", "exploit"),
2012            ],
2013        );
2014        let result = engine.process_request(&req);
2015        let rsc = result
2016            .detections
2017            .iter()
2018            .filter(|d| d.category == "rsc_attack")
2019            .collect::<Vec<_>>();
2020        assert!(!rsc.is_empty(), "Should detect RSC/Flight protocol attack");
2021    }
2022
2023    #[test]
2024    fn test_attacker_profiling() {
2025        let mut engine = HoneypotEngine::new();
2026
2027        // Send multiple attacks from same IP
2028        for i in 0..5 {
2029            let body = format!("cmd=;id_{}", i);
2030            let req = make_request(
2031                "POST",
2032                "/api/exec",
2033                &body,
2034                vec![("User-Agent", "sqlmap/1.0")],
2035            );
2036            engine.process_request(&req);
2037        }
2038
2039        let profiles = engine.get_profiles();
2040        assert!(!profiles.is_empty(), "Should have at least one profile");
2041        let profile = &profiles[0];
2042        assert!(profile.total_requests >= 5);
2043        assert!(!profile.techniques_used.is_empty());
2044    }
2045
2046    #[test]
2047    fn test_ua_parsing() {
2048        let engine = HoneypotEngine::new();
2049        let fp = engine.parse_user_agent(
2050            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
2051        );
2052        let fp = fp.unwrap();
2053        assert_eq!(fp.browser, "Chrome");
2054        assert_eq!(fp.os, "Windows");
2055        assert_eq!(fp.engine, "WebKit");
2056    }
2057
2058    #[test]
2059    fn test_no_detection_on_clean_request() {
2060        let mut engine = HoneypotEngine::new();
2061        let req = make_request("GET", "/", "", vec![("User-Agent", "Mozilla/5.0")]);
2062        let result = engine.process_request(&req);
2063        // Clean GET to / with no payloads should have zero critical/high detections
2064        let critical = result
2065            .detections
2066            .iter()
2067            .filter(|d| d.severity >= Severity::High)
2068            .count();
2069        assert_eq!(
2070            critical, 0,
2071            "Clean request should not trigger high-severity detections"
2072        );
2073    }
2074
2075    #[test]
2076    fn test_risk_score() {
2077        let mut profile = AttackerProfile {
2078            profile_id: "test".to_string(),
2079            ip: "10.0.0.1".to_string(),
2080            country: None,
2081            asn: None,
2082            is_tor: false,
2083            is_cloud: false,
2084            is_proxy: false,
2085            user_agent: String::new(),
2086            browser_fingerprint: None,
2087            first_seen: String::new(),
2088            last_seen: String::new(),
2089            total_requests: 200,
2090            attack_categories: [
2091                ("sqli".to_string(), 5),
2092                ("xss".to_string(), 3),
2093                ("cmdi".to_string(), 2),
2094            ]
2095            .into_iter()
2096            .collect(),
2097            techniques_used: vec![
2098                "union_select".to_string(),
2099                "stacked".to_string(),
2100                "reflected".to_string(),
2101            ],
2102            avg_request_interval: 0.05,
2103            is_automated: true,
2104            risk_score: 0.0,
2105            targets: vec!["/login".to_string(), "/api/exec".to_string()],
2106            event_timeline: vec![],
2107        };
2108
2109        let score = HoneypotEngine::calculate_risk_score(&profile);
2110        profile.risk_score = score;
2111
2112        assert!(
2113            score > 60.0,
2114            "Risk score should be high for diverse attacks: got {}",
2115            score
2116        );
2117        assert!(score <= 100.0, "Risk score should not exceed 100");
2118    }
2119
2120    #[test]
2121    fn test_export_json() {
2122        let mut engine = HoneypotEngine::new();
2123        let req = make_request(
2124            "POST",
2125            "/api/login",
2126            "user=admin'--",
2127            vec![("User-Agent", "Mozilla/5.0")],
2128        );
2129        engine.process_request(&req);
2130
2131        let json = engine.export_json().unwrap();
2132        assert!(json.contains("sqli"));
2133        assert!(json.contains("admin"));
2134    }
2135
2136    #[test]
2137    fn test_fake_rsc_response() {
2138        let engine = HoneypotEngine::new();
2139        let req = make_request(
2140            "POST",
2141            "/dashboard",
2142            "[]",
2143            vec![
2144                ("Content-Type", "text/x-component"),
2145                ("Next-Action", "test"),
2146            ],
2147        );
2148        let body = engine.generate_fake_rsc_response(&req);
2149        assert!(!body.is_empty());
2150        assert!(body.contains("$") || body.contains("pageProps") || body.contains("status"));
2151    }
2152
2153    #[test]
2154    fn test_fake_html_response() {
2155        let engine = HoneypotEngine::new();
2156        let req = make_request("GET", "/", "", vec![]);
2157        let body = engine.generate_fake_html_response(&req);
2158        assert!(body.contains("<html"));
2159        assert!(body.contains("Next.js"));
2160        assert!(body.contains("__next"));
2161    }
2162}
web_analyzer/react_honeypot.rs

web_analyzer/
react_honeypot.rs