Skip to main content

web_analyzer/
react_honeypot.rs

1//! # React2Shell Honeypot — Attack Vector Detection & Attacker Intelligence
2//!
3//! A realistic React Server Components (RSC) honeypot that detects **45+ attack
4//! vectors** while silently collecting comprehensive attacker intelligence.
5//!
6//! ## Capabilities
7//!
8//! - **Attack Detection** — SQLi, XSS, SSRF, SSTI, LFI/RFI, command injection,
9//!   NoSQLi, XXE, deserialization, JWT attacks, GraphQL injection, CRLF, path
10//!   traversal, prototype pollution, and 30+ more categories
11//! - **Attacker Profiling** — IP, GeoIP, User-Agent, OS/browser fingerprint,
12//!   request cadence, technique enumeration, session correlation
13//! - **Realistic RSC Simulation** — Fake Server Action endpoints, plausible error
14//!   messages, timing jitter, progressive response sizes
15//! - **Structured Intelligence** — JSON-serializable event logs, severity
16//!   scoring, risk classification, MITRE ATT&CK mapping
17
18use chrono::Utc;
19use regex::Regex;
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::time::{Duration, Instant};
23
24// ═════════════════════════════════════════════════════════════════════════════
25// Core Types
26// ═════════════════════════════════════════════════════════════════════════════
27
28/// Severity level for a detected attack.
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
30pub enum Severity {
31    #[serde(rename = "info")]
32    Info,
33    #[serde(rename = "low")]
34    Low,
35    #[serde(rename = "medium")]
36    Medium,
37    #[serde(rename = "high")]
38    High,
39    #[serde(rename = "critical")]
40    Critical,
41}
42
43/// An individual detected attack event.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct AttackEvent {
46    /// Unique event ID (UUIDv4-style timestamp-based).
47    pub event_id: String,
48    /// ISO-8601 timestamp.
49    pub timestamp: String,
50    /// Attack category (e.g. "sqli", "xss", "ssrf").
51    pub category: String,
52    /// Sub-category or specific technique.
53    pub subcategory: String,
54    /// The matched payload/pattern excerpt.
55    pub matched_payload: String,
56    /// Full incoming payload (truncated for storage).
57    pub full_payload: String,
58    /// HTTP method used.
59    pub method: String,
60    /// Request path/endpoint targeted.
61    pub path: String,
62    /// Severity assessment.
63    pub severity: Severity,
64    /// MITRE ATT&CK technique ID.
65    pub mitre_id: Option<String>,
66    /// The honeypot's simulated response code.
67    pub simulated_response: u16,
68    /// IP address of the attacker.
69    pub attacker_ip: String,
70    /// Raw User-Agent header.
71    pub user_agent: String,
72    /// All captured headers (sanitized).
73    pub headers: HashMap<String, String>,
74    /// Session tracking ID (cookie or fingerprint).
75    pub session_id: Option<String>,
76    /// Confidence score (0.0–1.0) that this is an actual attack.
77    pub confidence: f64,
78}
79
80/// Accumulated profile of an attacker across multiple requests.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct AttackerProfile {
83    /// Unique profile ID (derived from IP + fingerprint).
84    pub profile_id: String,
85    /// IP address.
86    pub ip: String,
87    /// GeoIP country code (if resolved).
88    pub country: Option<String>,
89    /// GeoIP ASN/organization.
90    pub asn: Option<String>,
91    /// Whether the IP is a known Tor exit node.
92    pub is_tor: bool,
93    /// Whether the IP belongs to a known cloud provider.
94    pub is_cloud: bool,
95    /// Whether the connection came through a proxy.
96    pub is_proxy: bool,
97    /// User-Agent string from the first request.
98    pub user_agent: String,
99    /// Parsed browser/OS fingerprint.
100    pub browser_fingerprint: Option<BrowserFingerprint>,
101    /// First-seen timestamp.
102    pub first_seen: String,
103    /// Last-seen timestamp.
104    pub last_seen: String,
105    /// Total requests sent.
106    pub total_requests: u64,
107    /// Count per attack category.
108    pub attack_categories: HashMap<String, u64>,
109    /// Techniques observed.
110    pub techniques_used: Vec<String>,
111    /// Average request interval (seconds).
112    pub avg_request_interval: f64,
113    /// Whether the attacker appears automated (bot/script).
114    pub is_automated: bool,
115    /// Cumulative risk score (0–100).
116    pub risk_score: f64,
117    /// List of targeted endpoints.
118    pub targets: Vec<String>,
119    /// Timeline of detected events.
120    pub event_timeline: Vec<String>,
121}
122
123/// Parsed browser and OS information from User-Agent.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct BrowserFingerprint {
126    pub browser: String,
127    pub browser_version: String,
128    pub os: String,
129    pub os_version: String,
130    pub engine: String,
131    pub device_type: String,
132    pub is_headless: bool,
133}
134
135/// Configuration for the honeypot engine.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct HoneypotConfig {
138    /// Maximum payload size to store (bytes).
139    pub max_payload_store: usize,
140    /// Whether to simulate realistic RSC timing delays.
141    pub realistic_timing: bool,
142    /// Minimum simulated delay (ms).
143    pub min_delay_ms: u64,
144    /// Maximum simulated delay (ms).
145    pub max_delay_ms: u64,
146    /// Whether to respond with fake RSC content.
147    pub fake_rsc_responses: bool,
148    /// Whether to track sessions via cookie/fingerprint.
149    pub session_tracking: bool,
150    /// Session cookie name to set.
151    pub session_cookie: String,
152    /// Whether to log all requests (not just attacks).
153    pub log_all_requests: bool,
154    /// Confidence threshold for considering a detection as an attack.
155    pub detection_threshold: f64,
156    /// Whether to simulate progressive response sizes (keep attackers engaged).
157    pub progressive_sizing: bool,
158}
159
160impl Default for HoneypotConfig {
161    fn default() -> Self {
162        Self {
163            max_payload_store: 8192,
164            realistic_timing: true,
165            min_delay_ms: 20,
166            max_delay_ms: 180,
167            fake_rsc_responses: true,
168            session_tracking: true,
169            session_cookie: "__Host-RSC-ID".to_string(),
170            log_all_requests: false,
171            detection_threshold: 0.5,
172            progressive_sizing: true,
173        }
174    }
175}
176
177/// The complete honeypot state.
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct HoneypotState {
180    pub config: HoneypotConfig,
181    pub total_requests: u64,
182    pub total_attacks_detected: u64,
183    pub unique_attackers: usize,
184    pub attack_events: Vec<AttackEvent>,
185    pub attacker_profiles: HashMap<String, AttackerProfile>,
186    pub uptime_seconds: f64,
187    pub requests_per_minute: f64,
188}
189
190/// Result of analyzing a single request.
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct DetectionResult {
193    /// All attack vectors detected in the request.
194    pub detections: Vec<AttackEvent>,
195    /// The simulated HTTP response status code.
196    pub simulated_status: u16,
197    /// The simulated response body.
198    pub simulated_body: String,
199    /// Recommended content-type for the response.
200    pub content_type: String,
201    /// Whether the request should be blocked.
202    pub should_block: bool,
203    /// Suggested delay before responding (ms).
204    pub suggested_delay_ms: u64,
205}
206
207/// Raw request input for analysis.
208#[derive(Debug, Clone)]
209pub struct RawRequest {
210    pub method: String,
211    pub path: String,
212    pub query_string: String,
213    pub body: String,
214    pub headers: HashMap<String, String>,
215    pub ip: String,
216    pub timestamp: chrono::DateTime<Utc>,
217}
218
219// ═════════════════════════════════════════════════════════════════════════════
220// Attack Vector Definitions (45+ Categories)
221// ═════════════════════════════════════════════════════════════════════════════
222
223/// Definition of a single attack vector detector.
224struct AttackVector {
225    category: &'static str,
226    subcategory: &'static str,
227    patterns: &'static [&'static str],
228    severity: Severity,
229    mitre_id: &'static str,
230    /// Where to search: "body", "query", "path", "headers", "all"
231    search_location: &'static str,
232    /// Additional context keywords that must also be present (AND logic).
233    context_keywords: &'static [&'static str],
234}
235
236/// A pre-compiled attack vector with compiled regex patterns.
237struct CompiledVector {
238    category: &'static str,
239    subcategory: &'static str,
240    patterns: Vec<Regex>,
241    severity: Severity,
242    mitre_id: &'static str,
243    search_location: &'static str,
244    context_keywords: &'static [&'static str],
245}
246
247/// All 45+ attack vector definitions.
248fn attack_vectors() -> &'static [AttackVector] {
249    use Severity::*;
250    static VECTORS: std::sync::OnceLock<Vec<AttackVector>> = std::sync::OnceLock::new();
251    VECTORS.get_or_init(|| vec![
252            // ── SQL Injection (Classic, Union, Blind, Error, Stacked, Time) ──
253            AttackVector {
254                category: "sqli", subcategory: "classic_tautology", severity: Critical,
255                mitre_id: "T1190",
256                search_location: "all",
257                context_keywords: &[],
258                patterns: &[
259                    r"(?i)('|\%27)\s*(OR|AND)\s*('|\%27)?\s*\d+\s*=\s*\d+",
260                    r"(?i)('|\%27)\s*(OR|AND)\s*('|\%27)?\s*'\d+'\s*=\s*'\d+'",
261                    r"(?i)(OR|AND)\s+\d+\s*=\s*\d+\s*--",
262                    r"(?i)admin'\s*(--|#|/\*)",
263                    r#"(?i)['"]\s*OR\s+1\s*=\s*1\s*--"#,
264                    r"(?i)'\s*OR\s+'1'\s*=\s*'1",
265                ],
266            },
267            AttackVector {
268                category: "sqli", subcategory: "union_select", severity: Critical,
269                mitre_id: "T1190",
270                search_location: "all",
271                context_keywords: &[],
272                patterns: &[
273                    r"(?i)UNION\s+(ALL\s+)?SELECT\s+(NULL|@@|\d+|user\b|database\b)",
274                    r"(?i)UNION\s+(ALL\s+)?SELECT\s+(NULL|@@|\d+|user\b|database\b).*--",
275                    r"(?i)'\s*UNION\s+SELECT\s+.*FROM\s+",
276                ],
277            },
278            AttackVector {
279                category: "sqli", subcategory: "blind_time", severity: Critical,
280                mitre_id: "T1190",
281                search_location: "all",
282                context_keywords: &[],
283                patterns: &[
284                    r"(?i)(SLEEP|pg_sleep|WAITFOR\s+DELAY|dbms_lock\.sleep|benchmark)\s*\(.*\d+",
285                    r"(?i)AND\s+(SLEEP|pg_sleep|WAITFOR)\s*\(\s*\d+\s*\)",
286                    r"(?i)'\s*AND\s+(SELECT\s+.*FROM\s+.*SLEEP)",
287                ],
288            },
289            AttackVector {
290                category: "sqli", subcategory: "error_based", severity: Critical,
291                mitre_id: "T1190",
292                search_location: "all",
293                context_keywords: &[],
294                patterns: &[
295                    r"(?i)extractvalue\s*\(\s*\d+\s*,\s*concat\s*\(",
296                    r"(?i)updatexml\s*\(\s*\d+\s*,\s*concat\s*\(",
297                    r"(?i)convert\s*\(.*using\s+",
298                    r"(?i)AND\s+1\s*=\s*CONVERT\s*\(int",
299                ],
300            },
301            AttackVector {
302                category: "sqli", subcategory: "stacked", severity: Critical,
303                mitre_id: "T1190",
304                search_location: "all",
305                context_keywords: &[],
306                patterns: &[
307                    r"(?i);\s*(DROP|INSERT|UPDATE|DELETE|ALTER|CREATE|EXEC|TRUNCATE|SHUTDOWN)\s+",
308                    r"(?i)';\s*(DROP|INSERT|UPDATE|DELETE)\s+",
309                    r"(?i);\s*EXEC\s+(sp_|xp_)",
310                ],
311            },
312            // ── NoSQL Injection ──
313            AttackVector {
314                category: "nosqli", subcategory: "mongodb", severity: Critical,
315                mitre_id: "T1190",
316                search_location: "all",
317                context_keywords: &[],
318                patterns: &[
319                    r#"(?i)\{\s*"\$ne"\s*:\s*""#,
320                    r#"(?i)\{\s*"\$gt"\s*:\s*""#,
321                    r#"(?i)\{\s*"\$regex"\s*:\s*".*"\s*\}"#,
322                    r#"(?i)\{\s*"\$where"\s*:\s*""#,
323                    r#"(?i)"\$(eq|ne|gt|gte|lt|lte|in|nin|regex|exists|type|mod|text|search|where)"\s*:"#,
324                    r"(?i)\{\s*'\$ne'\s*:\s*",
325                ],
326            },
327            AttackVector {
328                category: "nosqli", subcategory: "redis_injection", severity: High,
329                mitre_id: "T1190",
330                search_location: "all",
331                context_keywords: &[],
332                patterns: &[
333                    r"(?i)(\r\n|\n)\s*(CONFIG|SET|GET|FLUSHALL|KEYS|SAVE|SHUTDOWN|SLAVEOF)\s",
334                    r"(?i)%0[dD]%0[aA]\s*(CONFIG|SET|FLUSHALL)",
335                ],
336            },
337            // ── Cross-Site Scripting (XSS) ──
338            AttackVector {
339                category: "xss", subcategory: "reflected", severity: High,
340                mitre_id: "T1059.007",
341                search_location: "all",
342                context_keywords: &[],
343                patterns: &[
344                    r"(?i)<script[^>]*>.*</script>",
345                    r"(?i)<script[^>]*>.*",
346                    r#"(?i)javascript\s*:\s*(alert|prompt|confirm)\s*\("#,
347                    r#"(?i)"><script[^>]*>alert\("#,
348                    r#"(?i)<img[^>]+onerror\s*=\s*[`'\"]?\w+"#,
349                    r#"(?i)<svg[^>]+onload\s*=\s*[`'\"]?\w+"#,
350                ],
351            },
352            AttackVector {
353                category: "xss", subcategory: "polyglot", severity: High,
354                mitre_id: "T1059.007",
355                search_location: "all",
356                context_keywords: &[],
357                patterns: &[
358                    r#"(?i)jaVasCript:/*-/*`/*\`/*'/*"/**/(\s*/\*\s*/.*\)\s*;)"#,
359                    r#"(?i)"\s*;\s*alert\s*\(.*\)\s*//"#,
360                ],
361            },
362            AttackVector {
363                category: "xss", subcategory: "stored_payload", severity: High,
364                mitre_id: "T1059.007",
365                search_location: "all",
366                context_keywords: &[],
367                patterns: &[
368                    r#"(?i)<iframe[^>]*srcdoc\s*=\s*[`'\"]?\s*<script"#,
369                    r#"(?i)<object[^>]*data\s*=\s*[`'\"]?data:text/html"#,
370                    r#"(?i)<embed[^>]*src\s*=\s*[`'\"]?data:text/html"#,
371                ],
372            },
373            // ── Command Injection ──
374            AttackVector {
375                category: "cmdi", subcategory: "unix_pipe", severity: Critical,
376                mitre_id: "T1059.004",
377                search_location: "all",
378                context_keywords: &[],
379                patterns: &[
380                    r"(?m)[\|\;`]\s*(id|whoami|ls|cat|pwd|uname|hostname)\s*$",
381                    r"(?m)\$\(\s*(id|whoami|ls|cat|wget|curl)\s*",
382                    r"(?m)`\s*(id|whoami|ls|cat)\s*`",
383                    r"(?m)\|\|\s*(id|whoami|ls|cat|ping)\s",
384                    r"(?m)&&\s*(id|whoami|ls|cat|ping)\s",
385                    r"(?m);\s*(id|whoami|ls|cat|ping|sleep)\s",
386                ],
387            },
388            AttackVector {
389                category: "cmdi", subcategory: "unix_advanced", severity: Critical,
390                mitre_id: "T1059.004",
391                search_location: "all",
392                context_keywords: &[],
393                patterns: &[
394                    r"(?m)(/usr/bin/|/bin/|/sbin/)(id|whoami|ls|cat|bash|sh|nc|wget|curl)",
395                    r"(?m)\|\s*(nc|ncat|netcat)\s",
396                    r"(?m)\|\s*(wget|curl)\s+http",
397                    r"(?m);\s*/bin/(bash|sh|dash)\s+-[ci]",
398                    r"(?m);\s*(chmod|chown)\s",
399                ],
400            },
401            AttackVector {
402                category: "cmdi", subcategory: "windows", severity: Critical,
403                mitre_id: "T1059.003",
404                search_location: "all",
405                context_keywords: &[],
406                patterns: &[
407                    r"(?i)[\|\;`]\s*(whoami|systeminfo|ipconfig|net\s+user|tasklist)\b",
408                    r"(?i)cmd\.exe\s+/[cCkK]\s+",
409                    r"(?i)powershell\.exe\s+-[eE][xX]",
410                    r"(?i)%(COMSPEC|SystemRoot|WINDIR)%",
411                    r"(?i)certutil\s+-urlcache\s+-split\s+-f\s+http",
412                ],
413            },
414            AttackVector {
415                category: "cmdi", subcategory: "blind_oob", severity: Critical,
416                mitre_id: "T1059.004",
417                search_location: "all",
418                context_keywords: &[],
419                patterns: &[
420                    r"(?m)\|\s*(nslookup|dig|host)\s+[a-zA-Z0-9]",
421                    r"(?m)ping\s+-[cnt]\s+\d+\s+[a-zA-Z0-9]",
422                    r"(?m);\s*(nslookup|dig|host|ping)\s+\$\{",
423                ],
424            },
425            // ── Path Traversal / Directory Traversal ──
426            AttackVector {
427                category: "path_traversal", subcategory: "dot_dot_slash", severity: High,
428                mitre_id: "T1083",
429                search_location: "all",
430                context_keywords: &[],
431                patterns: &[
432                    r"(\.\./){2,}(etc|var|proc|sys|home|root|tmp|windows|winnt)",
433                    r"(\.\.\\){2,}(windows|winnt|system32|boot\.ini)",
434                    r"\.\./\.\./\.\./.*(passwd|shadow|hosts|\.ini|\.conf)",
435                    r"\.%2e/\.%2e/",
436                    r"\.%252e/\.%252e/",
437                    r"\.\.%2f\.\.%2f",
438                    r"\.\.%5c\.\.%5c",
439                    r"file:///(etc|proc|sys|var|home)/",
440                ],
441            },
442            AttackVector {
443                category: "path_traversal", subcategory: "absolute_path", severity: High,
444                mitre_id: "T1083",
445                search_location: "all",
446                context_keywords: &[],
447                patterns: &[
448                    r"^/(etc|proc|sys|var|root|home)/.*(passwd|shadow|hosts|\.conf)",
449                    r"^(C:|D:)\\(windows|winnt|system32)\\.*",
450                ],
451            },
452            // ── LFI / RFI (Local/Remote File Inclusion) ──
453            AttackVector {
454                category: "lfi", subcategory: "local_include", severity: Critical,
455                mitre_id: "T1190",
456                search_location: "all",
457                context_keywords: &[],
458                patterns: &[
459                    r"(?i)(file|page|path|include|require|document|folder|dir|template|module|load)\s*=\s*(\.\./|/etc/|/proc/)",
460                    r"(?i)/etc/(passwd|shadow|hosts|group|sudoers|resolv\.conf)",
461                    r"(?i)/proc/(self|version|cpuinfo|meminfo|cmdline)/?",
462                    r"(?i)/var/log/(apache|nginx|syslog|messages|auth\.log)",
463                    r"(?i)C:\\windows\\(system32|win\.ini|boot\.ini|repair\\sam)",
464                    r"(?i)php://filter/convert\.base64-encode/resource=",
465                    r"(?i)php://filter/read=convert\.base64-encode/resource=",
466                    r"(?i)php://input",
467                    r"(?i)expect://(id|whoami|ls)",
468                    r"(?i)data://text/plain;base64,",
469                ],
470            },
471            AttackVector {
472                category: "rfi", subcategory: "remote_include", severity: Critical,
473                mitre_id: "T1190",
474                search_location: "all",
475                context_keywords: &[],
476                patterns: &[
477                    r"(?i)(https?|ftp)://[^/\s]+/[^?\s]+\.(php|txt|jpg|png|gif)\?",
478                    r"(?i)(https?|ftp)://.*(shell|backdoor|r57|c99|web-shell)",
479                    r"(?i)(https?|ftp)://.*/.*\.(txt|php|asp|jsp)\?",
480                ],
481            },
482            // ── SSRF (Server-Side Request Forgery) ──
483            AttackVector {
484                category: "ssrf", subcategory: "cloud_metadata", severity: Critical,
485                mitre_id: "T1190",
486                search_location: "all",
487                context_keywords: &[],
488                patterns: &[
489                    r"(?i)(169\.254\.\d+\.\d+|metadata\.google\.internal|100\.100\.\d+\.\d+)",
490                    r"(?i)(/latest/meta-data|/metadata/v1|/openstack)",
491                    r"(?i)instance-data\.ec2\.internal",
492                    r"(?i)/latest/(meta-data|dynamic|user-data)",
493                    r"(?i)kubernetes\.default\.svc",
494                    r"(?i)\.compute\.internal",
495                ],
496            },
497            AttackVector {
498                category: "ssrf", subcategory: "internal_ports", severity: High,
499                mitre_id: "T1190",
500                search_location: "all",
501                context_keywords: &[],
502                patterns: &[
503                    r"(?i)(http://|https://)(localhost|127\.\d+\.\d+\.\d+|0\.0\.0\.0|\[::1\])\s*[/:]",
504                    r"(?i)(http://|https://)(10\.\d+\.\d+\.\d+|172\.1[6-9]\.\d+\.\d+|172\.2\d\.\d+\.\d+|172\.3[01]\.\d+\.\d+|192\.168\.\d+\.\d+)",
505                ],
506            },
507            AttackVector {
508                category: "ssrf", subcategory: "dns_rebinding", severity: Medium,
509                mitre_id: "T1190",
510                search_location: "all",
511                context_keywords: &[],
512                patterns: &[
513                    r"(?i)([a-z0-9]+\.){2,}(1zero|rbndr|nip\.io|xip\.io|sslip\.io)",
514                    r"(?i)(nslookup|dig|host)\s+[a-z0-9]+\.[a-z]+\.[a-z]+",
515                ],
516            },
517            // ── XXE (XML External Entity) ──
518            AttackVector {
519                category: "xxe", subcategory: "external_entity", severity: Critical,
520                mitre_id: "T1190",
521                search_location: "body",
522                context_keywords: &[],
523                patterns: &[
524                    r#"<!ENTITY\s+\w+\s+(SYSTEM|PUBLIC)\s+['\"]"#,
525                    r#"<!ENTITY\s+%\s+\w+\s+SYSTEM\s+['\"]"#,
526                    r"<!DOCTYPE\s+\w+\s+\[\s*<!ENTITY",
527                    r"<xml[^>]*>\s*<!DOCTYPE",
528                    r#"<\?xml[^?]*\?>\s*<!DOCTYPE\s+\w+\s+\["#,
529                ],
530            },
531            AttackVector {
532                category: "xxe", subcategory: "billion_laughs", severity: Critical,
533                mitre_id: "T1499.002",
534                search_location: "body",
535                context_keywords: &[],
536                patterns: &[
537                    r#"<!ENTITY\s+\w+\s+['\"]<!ENTITY"#,
538                    r"&(lol|lolz|lol1|lol2|laugh|boom|ha|haha);",
539                ],
540            },
541            // ── SSTI (Server-Side Template Injection) ──
542            AttackVector {
543                category: "ssti", subcategory: "jinja2", severity: Critical,
544                mitre_id: "T1190",
545                search_location: "all",
546                context_keywords: &[],
547                patterns: &[
548                    r"\{\{\s*(\d+\s*[\*\+\-]\s*\d+|\w+\.\w+)",
549                    r"\{\{\s*config\s*\}\}",
550                    r"\{\{\s*self\s*\}\}",
551                    r"\{\{\s*''\.__class__\.__mro__",
552                    r"\{\{\s*lipsum\.__globals__",
553                    r"\{\{\s*request\.application\.__globals__",
554                    r"\{\%\s*(import|extends|include|set|for|if)\s+",
555                    r"\{\{\s*cycler\.__init__\.__globals__",
556                ],
557            },
558            AttackVector {
559                category: "ssti", subcategory: "twig", severity: Critical,
560                mitre_id: "T1190",
561                search_location: "all",
562                context_keywords: &[],
563                patterns: &[
564                    r"\{\{\s*_self\.env\.registerUndefinedFilterCallback",
565                    r#"\{\{\s*['\"].*['\"]\s*\|\s*map\("#,
566                    r#"\{\{\s*['\"].*['\"]\s*\|\s*filter\("#,
567                ],
568            },
569            AttackVector {
570                category: "ssti", subcategory: "freemarker", severity: Critical,
571                mitre_id: "T1190",
572                search_location: "all",
573                context_keywords: &[],
574                patterns: &[
575                    r"\$\{.*\.class\.forName\(",
576                    r#"<\#assign\s+ex\s*=\s*['\"]freemarker"#,
577                    r"\$\{(.*\?)?new\s+java\.\w+\(",
578                ],
579            },
580            // ── Deserialization Attacks ──
581            AttackVector {
582                category: "deserialization", subcategory: "java", severity: Critical,
583                mitre_id: "T1190",
584                search_location: "all",
585                context_keywords: &[],
586                patterns: &[
587                    r"(?i)(ac ed 00 05|rO0AB|aced0005)",
588                    r"(?i)(com\.sun\.org\.apache\.xalan|org\.apache\.commons\.collections)",
589                    r"(?i)(java\.lang\.Runtime|java\.lang\.ProcessBuilder)",
590                    r"(?i)(org\.springframework\.beans\.factory)",
591                ],
592            },
593            AttackVector {
594                category: "deserialization", subcategory: "php", severity: Critical,
595                mitre_id: "T1190",
596                search_location: "all",
597                context_keywords: &[],
598                patterns: &[
599                    r#"(?i)(O:\d+:['\"][A-Za-z0-9_\\]+['\"]:\d+:)"#,
600                    r"(?i)(a:\d+:\{.*s:\d+:)",
601                    r#"(?i)(C:\d+:['\"][A-Za-z0-9_\\]+['\"]:\d+:)"#,
602                ],
603            },
604            AttackVector {
605                category: "deserialization", subcategory: "python_pickle", severity: Critical,
606                mitre_id: "T1190",
607                search_location: "all",
608                context_keywords: &[],
609                patterns: &[
610                    r"(?i)(cos\\nsystem|c__builtin__\\neval|csubprocess\\nPopen)",
611                    r"(?i)(__reduce__|__reduce_ex__)",
612                    r"(?i)(S'((import|exec|eval)\b|__import__)",
613                ],
614            },
615            AttackVector {
616                category: "deserialization", subcategory: "nodejs", severity: Critical,
617                mitre_id: "T1190",
618                search_location: "all",
619                context_keywords: &[],
620                patterns: &[
621                    r#"(?i)\{"_bsontype":"Code","code":"[^"]*require\(['"]child_process"#,
622                    r#"(?i)\{"type":"Function","body":"[^"]*require\("#,
623                ],
624            },
625            // ── JWT Attacks ──
626            AttackVector {
627                category: "jwt", subcategory: "none_algorithm", severity: Critical,
628                mitre_id: "T1557",
629                search_location: "all",
630                context_keywords: &[],
631                patterns: &[
632                    r#"(?i)"alg"\s*:\s*"none""#,
633                    r"(?i)ey[A-Za-z0-9_-]+\.ey[A-Za-z0-9_-]+\.(?:$|\s|&)",
634                ],
635            },
636            AttackVector {
637                category: "jwt", subcategory: "key_confusion", severity: High,
638                mitre_id: "T1557",
639                search_location: "all",
640                context_keywords: &[],
641                patterns: &[
642                    r#"(?i)"alg"\s*:\s*"HS256"[^}]*"k"\s*:"#,
643                    r#"(?i)"jwk"\s*:\s*\{[^}]*"kty"\s*:"#,
644                ],
645            },
646            // ── GraphQL Attacks ──
647            AttackVector {
648                category: "graphql", subcategory: "introspection", severity: Medium,
649                mitre_id: "T1190",
650                search_location: "body",
651                context_keywords: &[],
652                patterns: &[
653                    r"__schema\s*\{\s*types\s*\{",
654                    r#"__type\s*\(\s*name\s*:\s*\"\""#,
655                    r"query\s*\{\s*__schema\{",
656                    r"fragment\s+FullType\s+on\s+__Type\s*\{",
657                ],
658            },
659            AttackVector {
660                category: "graphql", subcategory: "batch_attack", severity: High,
661                mitre_id: "T1190",
662                search_location: "body",
663                context_keywords: &[],
664                patterns: &[
665                    r#"\[\s*\{\s*\"query\""#,
666                    r#"\"batch\"\s*:\s*\["#,
667                ],
668            },
669            // ── Prototype Pollution ──
670            AttackVector {
671                category: "prototype_pollution", subcategory: "javascript", severity: High,
672                mitre_id: "T1059.007",
673                search_location: "all",
674                context_keywords: &[],
675                patterns: &[
676                    r#"(?i)(__proto__|constructor|prototype)\s*=\s*["']"#,
677                    r#"(?i)"__proto__"\s*:\s*\{[^}]*\}"#,
678                    r#"(?i)"constructor"\s*:\s*\{[^}]*"prototype"\s*:"#,
679                    r#"(?i)\[\[__proto__\]\]\s*=\s*"#,
680                ],
681            },
682            // ── CRLF Injection ──
683            AttackVector {
684                category: "crlf", subcategory: "response_splitting", severity: High,
685                mitre_id: "T1190",
686                search_location: "all",
687                context_keywords: &[],
688                patterns: &[
689                    r"(\r\n|\%0[dD]\%0[aA])\s*Content-(Type|Length|Disposition):",
690                    r"(\r\n|\%0[dD]\%0[aA])\s*Set-Cookie\s*:",
691                    r"(\r\n|\%0[dD]\%0[aA])\s*(HTTP/|Location\s*:)",
692                    r"(\r\n|\%0[dD]\%0[aA])\s*X-XSS-Protection\s*:",
693                ],
694            },
695            AttackVector {
696                category: "crlf", subcategory: "header_injection", severity: Medium,
697                mitre_id: "T1190",
698                search_location: "all",
699                context_keywords: &[],
700                patterns: &[
701                    r"(\r\n|\%0[dD]\%0[aA])\s*[A-Za-z0-9\-]+\s*:\s*[^\n]+\r?\n",
702                ],
703            },
704            // ── HTTP Host Header / Request Smuggling ──
705            AttackVector {
706                category: "http_smuggling", subcategory: "cl_te", severity: High,
707                mitre_id: "T1190",
708                search_location: "headers",
709                context_keywords: &[],
710                patterns: &[
711                    r"(?i)^\s*Transfer-Encoding\s*:\s*[\x0b]",
712                    r"(?i)^\s*Transfer-Encoding\s*:.*\x0b",
713                    r"(?i)^\s*Content-Length\s*:\s*\d+\s*\n\s*Content-Length",
714                ],
715            },
716            AttackVector {
717                category: "host_attack", subcategory: "host_injection", severity: High,
718                mitre_id: "T1190",
719                search_location: "headers",
720                context_keywords: &[],
721                patterns: &[
722                    r"(?i)^\s*Host\s*:\s*(evil|attacker|malware|hack|bugbounty|pwned)\.(com|net|org|io)",
723                    r"(?i)^\s*Host\s*:\s*(127\.0\.0\.1|localhost|0\.0\.0\.0)",
724                    r"(?i)^\s*X-Forwarded-Host\s*:\s*(evil|attacker|127\.0\.0\.1)",
725                ],
726            },
727            // ── File Upload Attacks ──
728            AttackVector {
729                category: "file_upload", subcategory: "malicious_extension", severity: Critical,
730                mitre_id: "T1190",
731                search_location: "all",
732                context_keywords: &[],
733                patterns: &[
734                    r#"(?i)filename\s*=\s*["'][^"']+\.(php|jsp|asp|aspx|phtml|php5|php7|shtml|cgi|pl|war|jspx)['"]#"#,
735                    r#"(?i)Content-Disposition:.*filename=\\*['"][^'"]+\.(php|jsp|asp)['"]#"#,
736                    r"(?i)\.php\d*\.(jpg|png|gif|pdf)",
737                    r"(?i)\.(php|jsp|asp)\s*%00",
738                ],
739            },
740            // ── Open Redirect ──
741            AttackVector {
742                category: "open_redirect", subcategory: "url_param", severity: Medium,
743                mitre_id: "T1204.001",
744                search_location: "all",
745                context_keywords: &[],
746                patterns: &[
747                    r"(?i)(redirect|url|next|return|goto|target|dest|continue|back)\s*=\s*(https?://|//)[^&\s]+",
748                    r"(?i)(redirect|url|next|return|goto)\s*=\s*(evil|attacker|phish|malw)",
749                    r#"(?i)"(redirect|url|next)"\s*:\s*"(https?://|//)"#,
750                ],
751            },
752            // ── Cookie Manipulation ──
753            AttackVector {
754                category: "cookie_attack", subcategory: "injection", severity: Medium,
755                mitre_id: "T1539",
756                search_location: "headers",
757                context_keywords: &[],
758                patterns: &[
759                    r"(?i)Cookie\s*:\s*.*(<script|alert|onerror|javascript:)",
760                    r"(?i)Cookie\s*:\s*.*(../|\.\.\\\\)",
761                    r"(?i)Cookie\s*:\s*.*(SELECT|UNION)",
762                ],
763            },
764            // ── Cache Poisoning ──
765            AttackVector {
766                category: "cache_poisoning", subcategory: "header_probe", severity: High,
767                mitre_id: "T1499",
768                search_location: "headers",
769                context_keywords: &[],
770                patterns: &[
771                    r"(?i)^\s*X-Forwarded-(Scheme|Proto|Host|Port|Prefix)\s*:\s*(https?://)?[a-z]+",
772                    r"(?i)^\s*X-Original-URL\s*:\s*",
773                    r"(?i)^\s*X-Rewrite-URL\s*:\s*",
774                    r"(?i)^\s*X-HTTP-Method-Override\s*:\s*",
775                    r"(?i)^\s*X-Method-Override\s*:\s*",
776                ],
777            },
778            // ── Authentication Bypass ──
779            AttackVector {
780                category: "auth_bypass", subcategory: "header_forgery", severity: Critical,
781                mitre_id: "T1548",
782                search_location: "headers",
783                context_keywords: &[],
784                patterns: &[
785                    r"(?i)^\s*X-Forwarded-For\s*:\s*(127\.0\.0\.1|localhost|::1)",
786                    r"(?i)^\s*X-Remote-IP\s*:\s*(127\.0\.0\.1|10\.\d+\.\d+\.\d+)",
787                    r"(?i)^\s*X-Originating-IP\s*:\s*(127\.0\.0\.1)",
788                    r"(?i)^\s*X-Real-IP\s*:\s*(127\.0\.0\.1|10\.\d+\.\d+\.\d+)",
789                    r"(?i)^\s*Authorization\s*:\s*Basic\s+[A-Za-z0-9+/=]+={0,2}",
790                ],
791            },
792            // ── HTTP Parameter Pollution ──
793            AttackVector {
794                category: "hpp", subcategory: "duplicate_params", severity: Medium,
795                mitre_id: "T1190",
796                search_location: "query",
797                context_keywords: &[],
798                patterns: &[
799                    r#"(?i)([?&])[^?&=]+=[^?&=]+&[^?&=]+=&"#,
800                    r"(?i)([?&])[^?&=]+=[^?&=]+&(same_param)=[^?&=]+",
801                ],
802            },
803            // ── HTTP Method Tampering ──
804            AttackVector {
805                category: "method_tamper", subcategory: "method_override", severity: Medium,
806                mitre_id: "T1190",
807                search_location: "all",
808                context_keywords: &[],
809                patterns: &[
810                    r"(?i)_method\s*=\s*(PUT|DELETE|PATCH|OPTIONS|TRACE|CONNECT)",
811                    r"(?i)X-HTTP-Method\s*:\s*(PUT|DELETE)",
812                ],
813            },
814            // ── Null Byte Injection ──
815            AttackVector {
816                category: "null_byte", subcategory: "termination", severity: High,
817                mitre_id: "T1190",
818                search_location: "all",
819                context_keywords: &[],
820                patterns: &[
821                    r"(?i)%00\.(php|jsp|asp|html|txt|conf)",
822                    r"(?i)\.php%00",
823                    r"\x00[^\x00]*\.(php|jsp|asp)",
824                ],
825            },
826            // ── CORS Misconfiguration Probe ──
827            AttackVector {
828                category: "cors", subcategory: "origin_spoof", severity: Medium,
829                mitre_id: "T1190",
830                search_location: "headers",
831                context_keywords: &[],
832                patterns: &[
833                    r"(?i)^\s*Origin\s*:\s*https?://(evil|attacker|null|127\.0\.0\.1)",
834                    r"(?i)^\s*Origin\s*:\s*null",
835                ],
836            },
837            // ── Brute Force / Credential Stuffing ──
838            AttackVector {
839                category: "brute_force", subcategory: "multi_attempt", severity: High,
840                mitre_id: "T1110",
841                search_location: "body",
842                context_keywords: &[],
843                patterns: &[
844                    r#"(?i)(password|passwd|pwd|pin|secret|token)\s*=\s*['\"][^'\"]{1,20}['\"]"#,
845                    r#"(?i)\{"(email|username|user|login)"\s*:\s*"[^"]+"\s*,\s*"(password|passwd|pwd)"\s*:\s*""#,
846                ],
847            },
848            // ── Format String ──
849            AttackVector {
850                category: "format_string", subcategory: "printf_injection", severity: High,
851                mitre_id: "T1190",
852                search_location: "all",
853                context_keywords: &[],
854                patterns: &[
855                    r"(%[0-9]*\$)?%([xXndsSph]|p[rd]){1,2}",
856                    r"%[0-9]{1,2}\$[xdspnXDSPN]",
857                ],
858            },
859            // ── Race Condition Probing ──
860            AttackVector {
861                category: "race_condition", subcategory: "concurrent", severity: Medium,
862                mitre_id: "T1499",
863                search_location: "all",
864                context_keywords: &[],
865                patterns: &[
866                    r"(?i)(race|parallel|concurrent|thread)\s*=\s*(true|1|yes)",
867                ],
868            },
869            // ── Clickjacking Frame Attempts ──
870            AttackVector {
871                category: "clickjacking", subcategory: "frame_probe", severity: Low,
872                mitre_id: "T1499",
873                search_location: "all",
874                context_keywords: &[],
875                patterns: &[
876                    r#"(?i)<iframe[^>]*style\s*=\s*['\"]opacity\s*:\s*0"#,
877                    r#"(?i)<iframe[^>]*width\s*=\s*['\"]\d+['\"][^>]*height\s*=\s*['\"]\d+"#,
878                ],
879            },
880            // ── Source Map Extraction ──
881            AttackVector {
882                category: "source_leak", subcategory: "sourcemap_probe", severity: Low,
883                mitre_id: "T1213",
884                search_location: "all",
885                context_keywords: &[],
886                patterns: &[
887                    r"(?i)(\.js\.map|\.css\.map|//#\s*sourceMappingURL)",
888                    r"(?i)/_next/static/.*\.map$",
889                ],
890            },
891            // ── React/Next.js Specific Attacks ──
892            AttackVector {
893                category: "rsc_attack", subcategory: "flight_injection", severity: Critical,
894                mitre_id: "T1190",
895                search_location: "body",
896                context_keywords: &[],
897                patterns: &[
898                    r#"(?i)\[\["\$","@\w+",null,\{"#,
899                    r#"(?i)"type"\s*:\s*"blob_handler""#,
900                    r#"(?i)"dispatch"\s*:\s*"dynamic""#,
901                    r#"(?i)"method"\s*:\s*"child_process\.exec""#,
902                ],
903            },
904            AttackVector {
905                category: "rsc_attack", subcategory: "server_action_probe", severity: High,
906                mitre_id: "T1190",
907                search_location: "headers",
908                context_keywords: &[],
909                patterns: &[
910                    r"(?i)^\s*Next-Action\s*:",
911                    r"(?i)^\s*RSC\s*:\s*1",
912                    r"(?i)^\s*Content-Type\s*:\s*text/x-component",
913                    r"(?i)^\s*Next-Router-State-Tree\s*:",
914                ],
915            },
916            AttackVector {
917                category: "nextjs_probe", subcategory: "internal_route", severity: Medium,
918                mitre_id: "T1190",
919                search_location: "path",
920                context_keywords: &[],
921                patterns: &[
922                    r"^/_next/.*(webpack-hmr|__nextjs_|middleware)",
923                    r"^/_next/data/",
924                    r"^/_next/image\?url=",
925                ],
926            },
927            // ── WebSocket Attack Probing ──
928            AttackVector {
929                category: "websocket", subcategory: "injection", severity: High,
930                mitre_id: "T1190",
931                search_location: "all",
932                context_keywords: &[],
933                patterns: &[
934                    r"(?i)ws://(evil|attacker|localhost|127\.0\.0\.1)",
935                    r"(?i)Sec-WebSocket-Key\s*:\s*[A-Za-z0-9+/=]+",
936                ],
937            },
938            // ── DNS Exfiltration Probing ──
939            AttackVector {
940                category: "dns_exfil", subcategory: "tunnel_probe", severity: High,
941                mitre_id: "T1048.001",
942                search_location: "all",
943                context_keywords: &[],
944                patterns: &[
945                    r"(?i)(nslookup|dig|host)\s+\w{20,}\.[a-z]+\.[a-z]+",
946                    r"(?i)\.(burpcollaborator|interact\.sh|canarytokens|oastify)\.(com|net|io|pro|live|site|online|fun)",
947                ],
948            },
949            // ── Content-Type Confusion ──
950            AttackVector {
951                category: "content_type", subcategory: "mismatch_attack", severity: Medium,
952                mitre_id: "T1190",
953                search_location: "all",
954                context_keywords: &[],
955                patterns: &[
956                    r"(?i)Content-Type\s*:\s*text/html.*\{.*\}.*Content-Type\s*:\s*application/json",
957                ],
958            },
959            // ── Charset / Encoding Attacks ──
960            AttackVector {
961                category: "encoding_attack", subcategory: "charset_confusion", severity: Medium,
962                mitre_id: "T1190",
963                search_location: "all",
964                context_keywords: &[],
965                patterns: &[
966                    r"(?i)%u[0-9a-fA-F]{4}",
967                    r"(?i)&#x[0-9a-fA-F]+;",
968                    r"(?i)&#\d{2,};",
969                    r"(?i)[\\]x[0-9a-fA-F]{2}",
970                ],
971            },
972            // ── User-Agent Probing / Fake Crawlers ──
973            AttackVector {
974                category: "user_agent", subcategory: "fake_crawler", severity: Low,
975                mitre_id: "T1592",
976                search_location: "headers",
977                context_keywords: &[],
978                patterns: &[
979                    r"(?i)User-Agent\s*:\s*.*(sqlmap|nikto|nmap|burp|nessus|wpscan|dirbuster|gobuster|hydra)",
980                    r"(?i)User-Agent\s*:\s*.*(curl|wget|python|go-http|libwww|axios|node-fetch)",
981                ],
982            },
983            // ── API Key / Token Brute Force ──
984            AttackVector {
985                category: "credential_probe", subcategory: "token_brute", severity: High,
986                mitre_id: "T1110.001",
987                search_location: "all",
988                context_keywords: &[],
989                patterns: &[
990                    r"(?i)(Authorization|X-API-Key|X-Auth-Token|Bearer)\s*:\s*[A-Za-z0-9\-_\.]{20,}",
991                ],
992            },
993            // ── Session Fixation ──
994            AttackVector {
995                category: "session_fixation", subcategory: "cookie_set", severity: Medium,
996                mitre_id: "T1539",
997                search_location: "headers",
998                context_keywords: &[],
999                patterns: &[
1000                    r"(?i)Cookie\s*:\s*(SESSID|JSESSIONID|PHPSESSID|session_id|sid|connect\.sid)\s*=\s*[A-Za-z0-9]+",
1001                ],
1002            },
1003            // ── CSS Injection (data exfil) ──
1004            AttackVector {
1005                category: "css_injection", subcategory: "data_exfil", severity: Medium,
1006                mitre_id: "T1213",
1007                search_location: "all",
1008                context_keywords: &[],
1009                patterns: &[
1010                    r#"(?i)@import\s+url\s*\(\s*['\"]?https?://"#,
1011                    r#"(?i)background(-image)?\s*:\s*url\s*\(\s*['\"]?https?://"#,
1012                    r#"input\[type\s*=\s*["']password["']\][^{]*\{[^}]*background"#,
1013                ],
1014            },
1015    ])
1016}
1017
1018/// Pre-compiled attack vectors for fast detection.
1019fn compiled_vectors() -> &'static [CompiledVector] {
1020    static COMPILED: std::sync::OnceLock<Vec<CompiledVector>> = std::sync::OnceLock::new();
1021    COMPILED.get_or_init(|| {
1022        attack_vectors()
1023            .iter()
1024            .map(|av| {
1025                let patterns: Vec<Regex> = av
1026                    .patterns
1027                    .iter()
1028                    .filter_map(|p| Regex::new(p).ok())
1029                    .collect();
1030                CompiledVector {
1031                    category: av.category,
1032                    subcategory: av.subcategory,
1033                    patterns,
1034                    severity: av.severity.clone(),
1035                    mitre_id: av.mitre_id,
1036                    search_location: av.search_location,
1037                    context_keywords: av.context_keywords,
1038                }
1039            })
1040            .collect()
1041    })
1042}
1043
1044// ═════════════════════════════════════════════════════════════════════════════
1045// Honeypot Engine
1046// ═════════════════════════════════════════════════════════════════════════════
1047
1048/// The core honeypot detection and intelligence engine.
1049pub struct HoneypotEngine {
1050    config: HoneypotConfig,
1051    state: HoneypotState,
1052    /// Per-profile request timestamps for cadence analysis.
1053    request_times: HashMap<String, Vec<Instant>>,
1054    /// Fake RSC endpoint list for realistic simulation.
1055    rsc_endpoints: Vec<String>,
1056}
1057
1058impl HoneypotEngine {
1059    /// Create a new honeypot engine with default configuration.
1060    pub fn new() -> Self {
1061        Self::with_config(HoneypotConfig::default())
1062    }
1063
1064    /// Create a new honeypot engine with custom configuration.
1065    pub fn with_config(config: HoneypotConfig) -> Self {
1066        Self {
1067            config: config.clone(),
1068            state: HoneypotState {
1069                config: config,
1070                total_requests: 0,
1071                total_attacks_detected: 0,
1072                unique_attackers: 0,
1073                attack_events: Vec::new(),
1074                attacker_profiles: HashMap::new(),
1075                uptime_seconds: 0.0,
1076                requests_per_minute: 0.0,
1077            },
1078            request_times: HashMap::new(),
1079            rsc_endpoints: vec![
1080                "/_rsc/__PAGE__".to_string(),
1081                "/api/graphql".to_string(),
1082                "/api/auth/callback".to_string(),
1083                "/api/chat".to_string(),
1084                "/api/upload".to_string(),
1085                "/api/search".to_string(),
1086                "/api/admin/settings".to_string(),
1087                "/dashboard".to_string(),
1088            ],
1089        }
1090    }
1091
1092    /// Process a raw HTTP request and return detection results.
1093    pub fn process_request(&mut self, req: &RawRequest) -> DetectionResult {
1094        let _start = Instant::now();
1095        self.state.total_requests += 1;
1096
1097        // Update request rate
1098        self.state.uptime_seconds = (Utc::now().timestamp_millis() as f64) / 1000.0;
1099        if self.state.uptime_seconds > 0.0 {
1100            self.state.requests_per_minute =
1101                (self.state.total_requests as f64 / self.state.uptime_seconds) * 60.0;
1102        }
1103
1104        let profile_id = self.get_or_create_profile_id(req);
1105        self.update_request_times(&profile_id);
1106
1107        // Detect attacks
1108        let detections = self.detect_attacks(req, &profile_id);
1109
1110        let attack_count = detections.len() as u64;
1111        if attack_count > 0 {
1112            self.state.total_attacks_detected += attack_count;
1113            for det in &detections {
1114                self.state.attack_events.push(det.clone());
1115                // Limit stored events
1116                if self.state.attack_events.len() > 10000 {
1117                    self.state.attack_events.drain(0..1000);
1118                }
1119            }
1120        } else if self.config.log_all_requests {
1121            // Log clean requests too if configured
1122            let event = AttackEvent {
1123                event_id: Self::generate_event_id(),
1124                timestamp: Utc::now().to_rfc3339(),
1125                category: "clean".to_string(),
1126                subcategory: "passive".to_string(),
1127                matched_payload: String::new(),
1128                full_payload: String::new(),
1129                method: req.method.clone(),
1130                path: req.path.clone(),
1131                severity: Severity::Info,
1132                mitre_id: None,
1133                simulated_response: 200,
1134                attacker_ip: req.ip.clone(),
1135                user_agent: req.headers.get("user-agent").cloned().unwrap_or_default(),
1136                headers: req.headers.clone(),
1137                session_id: Some(profile_id.clone()),
1138                confidence: 0.0,
1139            };
1140            self.state.attack_events.push(event);
1141            if self.state.attack_events.len() > 10000 {
1142                self.state.attack_events.drain(0..1000);
1143            }
1144        }
1145
1146        // Update or create attacker profile
1147        self.update_attacker_profile(req, &profile_id, &detections);
1148
1149        // Evict stale profiles periodically (keep top 5000)
1150        if self.state.attacker_profiles.len() > 10000 {
1151            self.evict_profiles();
1152        }
1153
1154        // Generate simulated response
1155        let simulated_status = self.simulate_status(&detections);
1156        let simulated_body = self.simulate_body(req, &detections);
1157        let content_type = self.simulate_content_type(req);
1158        let should_block = self.should_block_request(&detections);
1159        let suggested_delay = self.calculate_delay(&detections);
1160
1161        DetectionResult {
1162            detections,
1163            simulated_status,
1164            simulated_body,
1165            content_type,
1166            should_block,
1167            suggested_delay_ms: suggested_delay.as_millis() as u64,
1168        }
1169    }
1170
1171    /// Analyze all 45+ attack vectors against a request.
1172    fn detect_attacks(&mut self, req: &RawRequest, profile_id: &str) -> Vec<AttackEvent> {
1173        let mut events = Vec::new();
1174
1175        for vector in compiled_vectors().iter() {
1176            let search_text = match vector.search_location {
1177                "body" => &req.body,
1178                "query" => &req.query_string,
1179                "path" => &req.path,
1180                "headers" => &self.headers_as_string(&req.headers),
1181                "all" => &self.all_request_text(req),
1182                _ => &self.all_request_text(req),
1183            };
1184
1185            if search_text.is_empty() {
1186                continue;
1187            }
1188
1189            // Check context keywords (AND logic)
1190            if !vector.context_keywords.is_empty() {
1191                let has_context = vector.context_keywords.iter().any(|kw| {
1192                    search_text.to_lowercase().contains(&kw.to_lowercase())
1193                });
1194                if !has_context {
1195                    continue;
1196                }
1197            }
1198
1199            for re in &vector.patterns {
1200                if let Some(m) = re.find(search_text) {
1201                    let matched = m.as_str().to_string();
1202                    let confidence = self.calculate_confidence(vector, &matched, search_text);
1203
1204                    if confidence >= self.config.detection_threshold {
1205                        events.push(AttackEvent {
1206                            event_id: Self::generate_event_id(),
1207                            timestamp: Utc::now().to_rfc3339(),
1208                            category: vector.category.to_string(),
1209                            subcategory: vector.subcategory.to_string(),
1210                            matched_payload: Self::truncate_str(&matched, 500),
1211                            full_payload: Self::truncate_str(
1212                                search_text,
1213                                self.config.max_payload_store,
1214                            ),
1215                            method: req.method.clone(),
1216                            path: req.path.clone(),
1217                            severity: vector.severity.clone(),
1218                            mitre_id: Some(vector.mitre_id.to_string()),
1219                            simulated_response: 0, // Filled later
1220                            attacker_ip: req.ip.clone(),
1221                            user_agent: req
1222                                .headers
1223                                .get("user-agent")
1224                                .cloned()
1225                                .unwrap_or_default(),
1226                            headers: req.headers.clone(),
1227                            session_id: Some(profile_id.to_string()),
1228                            confidence,
1229                        });
1230                        break; // One match per vector category is enough
1231                    }
1232                }
1233            }
1234        }
1235
1236        // Fill simulated_response after detection
1237        for event in &mut events {
1238            event.simulated_response = self.simulate_status_for_event(event);
1239        }
1240
1241        events
1242    }
1243
1244    /// Calculate detection confidence based on pattern specificity and context.
1245    fn calculate_confidence(
1246        &self,
1247        _vector: &CompiledVector,
1248        matched: &str,
1249        full_text: &str,
1250    ) -> f64 {
1251        let mut confidence = 0.5; // Base confidence
1252
1253        // Longer patterns are more specific
1254        let specificity_bonus = (matched.len() as f64 / 30.0).min(0.3);
1255        confidence += specificity_bonus;
1256
1257        // Multiple patterns from same category (already checked via context_keywords)
1258        // Count additional keyword signals
1259        let keyword_signals = [
1260            ("eval", 0.05),
1261            ("exec", 0.05),
1262            ("system", 0.05),
1263            ("import", 0.03),
1264            ("require", 0.03),
1265            ("base64", 0.04),
1266            ("fromCharCode", 0.05),
1267            ("String.fromCharCode", 0.06),
1268            ("atob", 0.03),
1269            ("charCodeAt", 0.03),
1270            ("document.cookie", 0.06),
1271            ("window.location", 0.04),
1272            ("XMLHttpRequest", 0.03),
1273            ("fetch(", 0.02),
1274            ("curl", 0.04),
1275            ("wget", 0.04),
1276            ("nc ", 0.05),
1277            ("/bin/bash", 0.06),
1278            ("/bin/sh", 0.06),
1279            ("cmd.exe", 0.06),
1280            ("powershell", 0.06),
1281            ("reverse", 0.04),
1282            ("shell", 0.05),
1283            ("backdoor", 0.06),
1284            ("trojan", 0.06),
1285            ("exploit", 0.05),
1286        ];
1287
1288        let lower = full_text.to_lowercase();
1289        for (signal, bonus) in keyword_signals {
1290            if lower.contains(signal) {
1291                confidence += bonus;
1292            }
1293        }
1294
1295        confidence.min(1.0)
1296    }
1297
1298    /// Determine the simulated HTTP status for a detection set.
1299    fn simulate_status(&self, detections: &[AttackEvent]) -> u16 {
1300        if detections.is_empty() {
1301            return 200;
1302        }
1303        let has_critical = detections
1304            .iter()
1305            .any(|d| d.severity == Severity::Critical);
1306        let has_high = detections.iter().any(|d| d.severity == Severity::High);
1307
1308        if has_critical {
1309            500 // Internal error — don't tip off the attacker
1310        } else if has_high {
1311            400 // Bad request
1312        } else {
1313            200 // Appear normal for low/medium severity
1314        }
1315    }
1316
1317    fn simulate_status_for_event(&self, event: &AttackEvent) -> u16 {
1318        match event.severity {
1319            Severity::Critical => 500,
1320            Severity::High => 400,
1321            Severity::Medium | Severity::Low | Severity::Info => 200,
1322        }
1323    }
1324
1325    /// Check whether a request targets an RSC endpoint.
1326    fn is_rsc_request(&self, req: &RawRequest) -> bool {
1327        self.rsc_endpoints.iter().any(|ep| req.path.starts_with(ep))
1328            || req
1329                .headers
1330                .get("content-type")
1331                .map(|ct| ct.contains("text/x-component"))
1332                .unwrap_or(false)
1333            || req.headers.contains_key("next-action")
1334    }
1335
1336    /// Generate a realistic fake response body.
1337    fn simulate_body(&self, req: &RawRequest, _detections: &[AttackEvent]) -> String {
1338        if !self.config.fake_rsc_responses {
1339            return String::new();
1340        }
1341
1342        let is_rsc = self.is_rsc_request(req);
1343        let mut body = if is_rsc {
1344            self.generate_fake_rsc_response(req)
1345        } else if req.path.contains("/api/") {
1346            self.generate_fake_api_response(req)
1347        } else {
1348            self.generate_fake_html_response(req)
1349        };
1350
1351        // Progressive sizing: pad response with irrelevant data to simulate
1352        // a real app's variable response sizes and keep attackers engaged
1353        if self.config.progressive_sizing {
1354            let extra_bytes = 128 + (Utc::now().timestamp_millis() as usize % 1024);
1355            body.push_str(&" ".repeat(extra_bytes / 32));
1356        }
1357
1358        body
1359    }
1360
1361    /// Generate a fake React Server Components Flight-protocol response.
1362    fn generate_fake_rsc_response(&self, _req: &RawRequest) -> String {
1363        let responses = [
1364            r#"0:["$","@2",null,{"id":"__PAGE__","children":[["$","@3",null,{"name":"Page","props":{}}]]}]
13651:{"status":"resolved","data":{"pageProps":{"title":"Dashboard","user":{"name":"Admin User","role":"administrator","email":"admin@internal.local"}}}}
13662:["$","div",null,{"className":"page-wrapper","children":[["$","header",null,{"children":"Dashboard"}],["$","main",null,{"children":["$","p",null,{"children":"Welcome back, Admin User"},"$","@4",null,{}]}]]
13673:{"status":"pending","chunks":["@5","@6"]}"#,
1368            r#"0:["$","@2",null,{"id":"__PAGE__"}]
13691:{"status":"resolved","data":{"pageProps":{"items":[{"id":1,"name":"Project Alpha","status":"active","owner":"admin"},{"id":2,"name":"Project Beta","status":"inactive","owner":"user2"},{"id":3,"name":"API Gateway","status":"active","owner":"admin"}]}}}
13702:"$6eb96e9c8e4a3f1b2d5c7a8e9f0a1b2c""#,
1371            r#"0:["$","@2",null,{"id":"__PAGE__","children":[["$","@3",null,{"name":"ErrorBoundary"}]],"fallback":["$","@4",null,{"name":"SuspenseFallback"}]}]
13721:{"status":"pending","chunks":["@5"]}
13732:["$","div",null,{"className":"layout","children":["$","nav",null,{"children":[["$","a",null,{"href":"/dashboard","children":"Dashboard"}],["$","a",null,{"href":"/settings","children":"Settings"}]]}]}"#,
1374        ];
1375
1376        let idx = (Utc::now().timestamp_millis() as usize) % responses.len();
1377        responses[idx].to_string()
1378    }
1379
1380    /// Generate a fake JSON API response.
1381    fn generate_fake_api_response(&self, req: &RawRequest) -> String {
1382        if req.path.contains("/graphql") {
1383            r#"{"data":{"__typename":"Query","node":{"id":"UHJvamVjdDox","name":"Internal Project","owner":{"login":"admin","email":"admin@internal.local"}}}}"#.to_string()
1384        } else {
1385            r#"{"success":true,"data":{"id":"67f1a2b3c4d5","status":"ok","timestamp":"2026-05-06T12:00:00Z","message":"Operation completed"}}"#.to_string()
1386        }
1387    }
1388
1389    /// Generate a fake HTML page response.
1390    fn generate_fake_html_response(&self, _req: &RawRequest) -> String {
1391        format!(
1392            r#"<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="generator" content="Next.js 15.2.3"><meta name="viewport" content="width=device-width,initial-scale=1"><title>Internal Dashboard</title><link rel="preload" href="/_next/static/chunks/app/layout-{}.js" as="script"></head><body><div id="__next"><div class="app-shell"><header class="topbar"><nav><a href="/dashboard">Dashboard</a><a href="/admin">Admin</a><a href="/api/docs">API</a></nav><div class="user-menu">Signed in as <strong>Admin User</strong></div></header><main><!--$?--><template id="B:0"></template><div class="skeleton-loader"><div class="skeleton-card"></div><div class="skeleton-row"></div><div class="skeleton-row short"></div></div><!--/$--></main></div></div><script src="/_next/static/chunks/main-app-{}.js" async></script></body></html>"#,
1393            Self::random_hex(16),
1394            Self::random_hex(16)
1395        )
1396    }
1397
1398    fn simulate_content_type(&self, req: &RawRequest) -> String {
1399        let is_rsc = self.is_rsc_request(req);
1400
1401        if is_rsc {
1402            "text/x-component; charset=utf-8".to_string()
1403        } else if req.path.contains("/api/") {
1404            "application/json; charset=utf-8".to_string()
1405        } else {
1406            "text/html; charset=utf-8".to_string()
1407        }
1408    }
1409
1410    /// Decide whether to block based on severity.
1411    fn should_block_request(&self, detections: &[AttackEvent]) -> bool {
1412        detections
1413            .iter()
1414            .any(|d| d.severity == Severity::Critical || d.confidence > 0.9)
1415    }
1416
1417    /// Calculate realistic response delay.
1418    fn calculate_delay(&self, _detections: &[AttackEvent]) -> Duration {
1419        if !self.config.realistic_timing {
1420            return Duration::from_millis(0);
1421        }
1422        use std::collections::hash_map::DefaultHasher;
1423        use std::hash::{Hash, Hasher};
1424        let mut hasher = DefaultHasher::new();
1425        Utc::now().timestamp_nanos_opt().unwrap_or(0).hash(&mut hasher);
1426        let hash = hasher.finish();
1427        let jitter = (hash % (self.config.max_delay_ms - self.config.min_delay_ms + 1)) as u64;
1428        Duration::from_millis(self.config.min_delay_ms + jitter)
1429    }
1430
1431    // ── Attacker Profiling ────────────────────────────────────────────────
1432
1433    /// Get or create a profile ID for an attacker.
1434    fn get_or_create_profile_id(&mut self, req: &RawRequest) -> String {
1435        let fingerprint = self.build_fingerprint(req);
1436        let hashed = Self::hash_str(&fingerprint);
1437        let profile_id = format!("prof_{}", &hashed[..16]);
1438
1439        if !self.state.attacker_profiles.contains_key(&profile_id) {
1440            self.state.attacker_profiles.insert(
1441                profile_id.clone(),
1442                AttackerProfile {
1443                    profile_id: profile_id.clone(),
1444                    ip: req.ip.clone(),
1445                    country: None,
1446                    asn: None,
1447                    is_tor: false,
1448                    is_cloud: false,
1449                    is_proxy: false,
1450                    user_agent: req
1451                        .headers
1452                        .get("user-agent")
1453                        .cloned()
1454                        .unwrap_or_default(),
1455                    browser_fingerprint: self.parse_user_agent(
1456                        req.headers
1457                            .get("user-agent")
1458                            .map(|s| s.as_str())
1459                            .unwrap_or(""),
1460                    ),
1461                    first_seen: Utc::now().to_rfc3339(),
1462                    last_seen: Utc::now().to_rfc3339(),
1463                    total_requests: 0,
1464                    attack_categories: HashMap::new(),
1465                    techniques_used: Vec::new(),
1466                    avg_request_interval: 0.0,
1467                    is_automated: false,
1468                    risk_score: 0.0,
1469                    targets: Vec::new(),
1470                    event_timeline: Vec::new(),
1471                },
1472            );
1473            self.state.unique_attackers = self.state.attacker_profiles.len();
1474        }
1475
1476        profile_id
1477    }
1478
1479    /// Build a fingerprint from request characteristics.
1480    fn build_fingerprint(&self, req: &RawRequest) -> String {
1481        let ua = req.headers.get("user-agent").map(|s| s.as_str()).unwrap_or("");
1482        let accept = req
1483            .headers
1484            .get("accept")
1485            .map(|s| s.as_str())
1486            .unwrap_or("");
1487        let accept_lang = req
1488            .headers
1489            .get("accept-language")
1490            .map(|s| s.as_str())
1491            .unwrap_or("");
1492        let accept_enc = req
1493            .headers
1494            .get("accept-encoding")
1495            .map(|s| s.as_str())
1496            .unwrap_or("");
1497
1498        format!("{}|{}|{}|{}|{}", req.ip, ua, accept, accept_lang, accept_enc)
1499    }
1500
1501    /// Update request time tracking for cadence analysis.
1502    fn update_request_times(&mut self, profile_id: &str) {
1503        let times = self.request_times.entry(profile_id.to_string()).or_default();
1504        times.push(Instant::now());
1505        // Keep only last 100 timestamps
1506        if times.len() > 100 {
1507            times.drain(0..times.len() - 100);
1508        }
1509
1510        // Update automaton detection
1511        if let Some(profile) = self.state.attacker_profiles.get_mut(profile_id) {
1512            profile.total_requests += 1;
1513            profile.last_seen = Utc::now().to_rfc3339();
1514            if times.len() >= 3 {
1515                let intervals: Vec<f64> = times
1516                    .windows(2)
1517                    .map(|w| w[1].duration_since(w[0]).as_secs_f64())
1518                    .collect();
1519                profile.avg_request_interval =
1520                    intervals.iter().sum::<f64>() / intervals.len() as f64;
1521
1522                // Detect automation: very consistent timing or very fast
1523                if profile.total_requests >= 10 {
1524                    let std_dev = Self::std_dev(&intervals, profile.avg_request_interval);
1525                    profile.is_automated = profile.avg_request_interval < 0.1
1526                        || (profile.avg_request_interval < 0.5 && std_dev < 0.05);
1527                }
1528            }
1529        }
1530    }
1531
1532    /// Update the attacker profile with detection results.
1533    fn update_attacker_profile(
1534        &mut self,
1535        req: &RawRequest,
1536        profile_id: &str,
1537        detections: &[AttackEvent],
1538    ) {
1539        if let Some(profile) = self.state.attacker_profiles.get_mut(profile_id) {
1540            for det in detections {
1541                *profile
1542                    .attack_categories
1543                    .entry(det.category.clone())
1544                    .or_insert(0) += 1;
1545                if !profile.techniques_used.contains(&det.subcategory) {
1546                    profile.techniques_used.push(det.subcategory.clone());
1547                }
1548                profile.event_timeline.push(format!(
1549                    "{} | {}:{} | {} | conf={:.2}",
1550                    &det.timestamp[..19],
1551                    det.category,
1552                    det.subcategory,
1553                    det.severity
1554                        .clone()
1555                        .serde_name()
1556                        .unwrap_or("unknown"),
1557                    det.confidence
1558                ));
1559            }
1560            if !profile.targets.contains(&req.path) {
1561                profile.targets.push(req.path.clone());
1562            }
1563
1564            // Calculate risk score (0-100)
1565            profile.risk_score = Self::calculate_risk_score(profile);
1566        }
1567    }
1568
1569    /// Evict low-activity profiles to prevent unbounded memory growth.
1570    /// Keeps the top 5000 profiles sorted by risk_score (descending).
1571    fn evict_profiles(&mut self) {
1572        let mut sorted: Vec<(String, f64)> = self
1573            .state
1574            .attacker_profiles
1575            .iter()
1576            .map(|(id, p)| (id.clone(), p.risk_score))
1577            .collect();
1578        sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1579        let keep_ids: std::collections::HashSet<String> = sorted
1580            .iter()
1581            .take(5000)
1582            .map(|(id, _)| id.clone())
1583            .collect();
1584        self.state
1585            .attacker_profiles
1586            .retain(|id, _| keep_ids.contains(id));
1587        self.state.unique_attackers = self.state.attacker_profiles.len();
1588    }
1589
1590    /// Calculate a cumulative risk score for an attacker profile.
1591    fn calculate_risk_score(profile: &AttackerProfile) -> f64 {
1592        let mut score = 0.0;
1593
1594        // Severity-based scoring
1595        let severity_weights: HashMap<&str, f64> = [
1596            ("sqli", 15.0),
1597            ("cmdi", 15.0),
1598            ("rce", 15.0),
1599            ("deserialization", 15.0),
1600            ("xxe", 14.0),
1601            ("ssti", 14.0),
1602            ("lfi", 13.0),
1603            ("rfi", 13.0),
1604            ("ssrf", 12.0),
1605            ("nosqli", 12.0),
1606            ("xss", 8.0),
1607            ("path_traversal", 8.0),
1608            ("file_upload", 10.0),
1609            ("crlf", 7.0),
1610            ("http_smuggling", 9.0),
1611            ("jwt", 9.0),
1612            ("auth_bypass", 10.0),
1613            ("prototype_pollution", 8.0),
1614            ("rsc_attack", 14.0),
1615            ("dns_exfil", 6.0),
1616        ]
1617        .into_iter()
1618        .collect();
1619
1620        for (cat, count) in &profile.attack_categories {
1621            let weight = severity_weights.get(cat.as_str()).copied().unwrap_or(3.0);
1622            score += weight * (*count as f64).min(3.0); // Cap at 3x per category
1623        }
1624
1625        // Multiplier for diverse techniques
1626        let technique_bonus = (profile.techniques_used.len() as f64 * 2.0).min(20.0);
1627        score += technique_bonus;
1628
1629        // Automation bonus
1630        if profile.is_automated {
1631            score += 10.0;
1632        }
1633
1634        // Request volume bonus
1635        if profile.total_requests > 100 {
1636            score += 5.0;
1637        }
1638        if profile.total_requests > 500 {
1639            score += 5.0;
1640        }
1641
1642        score.min(100.0)
1643    }
1644
1645    // ── User-Agent Parsing ─────────────────────────────────────────────────
1646
1647    /// Parse User-Agent string into structured browser/OS fingerprint.
1648    fn parse_user_agent(&self, ua: &str) -> Option<BrowserFingerprint> {
1649        if ua.is_empty() {
1650            return None;
1651        }
1652
1653        let ua_lower = ua.to_lowercase();
1654
1655        // Browser detection
1656        let (browser, browser_version) = if ua_lower.contains("firefox") {
1657            ("Firefox", Self::extract_version(ua, "Firefox/"))
1658        } else if ua_lower.contains("edg") {
1659            ("Edge", Self::extract_version(ua, "Edg/"))
1660        } else if ua_lower.contains("chrome") && !ua_lower.contains("chromium") {
1661            ("Chrome", Self::extract_version(ua, "Chrome/"))
1662        } else if ua_lower.contains("safari") && !ua_lower.contains("chrome") {
1663            ("Safari", Self::extract_version(ua, "Version/"))
1664        } else if ua_lower.contains("opera") || ua_lower.contains("opr") {
1665            ("Opera", Self::extract_version(ua, "OPR/"))
1666        } else if ua_lower.contains("msie") || ua_lower.contains("trident") {
1667            ("Internet Explorer", Self::extract_version(ua, "MSIE "))
1668        } else {
1669            ("Unknown", "0.0".to_string())
1670        };
1671
1672        // OS detection
1673        let (os, os_version) = if ua_lower.contains("windows nt 10") {
1674            ("Windows", "10/11".to_string())
1675        } else if ua_lower.contains("windows nt 6.3") {
1676            ("Windows", "8.1".to_string())
1677        } else if ua_lower.contains("windows nt 6.1") {
1678            ("Windows", "7".to_string())
1679        } else if ua_lower.contains("mac os x") {
1680            ("macOS", Self::extract_version(ua, "Mac OS X "))
1681        } else if ua_lower.contains("android") {
1682            ("Android", Self::extract_version(ua, "Android "))
1683        } else if ua_lower.contains("iphone") || ua_lower.contains("ipad") {
1684            ("iOS", Self::extract_version(ua, "OS "))
1685        } else if ua_lower.contains("linux") {
1686            ("Linux", "".to_string())
1687        } else {
1688            ("Unknown", "".to_string())
1689        };
1690
1691        let engine = if ua_lower.contains("webkit") {
1692            "WebKit"
1693        } else if ua_lower.contains("gecko") {
1694            "Gecko"
1695        } else if ua_lower.contains("trident") {
1696            "Trident"
1697        } else {
1698            "Unknown"
1699        };
1700
1701        let device_type = if ua_lower.contains("mobile") || ua_lower.contains("android") {
1702            "Mobile"
1703        } else if ua_lower.contains("tablet") || ua_lower.contains("ipad") {
1704            "Tablet"
1705        } else {
1706            "Desktop"
1707        };
1708
1709        let is_headless = ua_lower.contains("headless")
1710            || ua_lower.contains("phantom")
1711            || ua_lower.contains("puppeteer")
1712            || ua_lower.contains("playwright")
1713            || ua_lower.contains("selenium");
1714
1715        Some(BrowserFingerprint {
1716            browser: browser.to_string(),
1717            browser_version,
1718            os: os.to_string(),
1719            os_version,
1720            engine: engine.to_string(),
1721            device_type: device_type.to_string(),
1722            is_headless,
1723        })
1724    }
1725
1726    fn extract_version(ua: &str, prefix: &str) -> String {
1727        if let Some(pos) = ua.find(prefix) {
1728            let start = pos + prefix.len();
1729            let end = ua[start..]
1730                .find(|c: char| c == ' ' || c == ';' || c == ')')
1731                .map(|p| start + p)
1732                .unwrap_or(ua.len());
1733            let version = &ua[start..end];
1734            // Take only major.minor
1735            version
1736                .split('.')
1737                .take(2)
1738                .collect::<Vec<_>>()
1739                .join(".")
1740        } else {
1741            "0.0".to_string()
1742        }
1743    }
1744
1745    // ── Helpers ────────────────────────────────────────────────────────────
1746
1747    fn all_request_text(&self, req: &RawRequest) -> String {
1748        format!(
1749            "{} {}?{} {} {}",
1750            req.method,
1751            req.path,
1752            req.query_string,
1753            req.body,
1754            self.headers_as_string(&req.headers)
1755        )
1756    }
1757
1758    fn headers_as_string(&self, headers: &HashMap<String, String>) -> String {
1759        headers
1760            .iter()
1761            .map(|(k, v)| format!("{}: {}", k, v))
1762            .collect::<Vec<_>>()
1763            .join("\n")
1764    }
1765
1766    fn generate_event_id() -> String {
1767        format!(
1768            "evt_{}_{}",
1769            Utc::now().timestamp_millis(),
1770            Self::random_hex(8)
1771        )
1772    }
1773
1774    fn truncate_str(s: &str, max_len: usize) -> String {
1775        if s.len() <= max_len {
1776            s.to_string()
1777        } else {
1778            format!("{}...[truncated {} bytes]", &s[..max_len], s.len() - max_len)
1779        }
1780    }
1781
1782    fn random_hex(len: usize) -> String {
1783        use std::collections::hash_map::DefaultHasher;
1784        use std::hash::{Hash, Hasher};
1785        let mut hasher = DefaultHasher::new();
1786        Utc::now().timestamp_nanos_opt().unwrap_or(0).hash(&mut hasher);
1787        format!("{:016x}", hasher.finish())[..len].to_string()
1788    }
1789
1790    fn hash_str(s: &str) -> String {
1791        use std::collections::hash_map::DefaultHasher;
1792        use std::hash::{Hash, Hasher};
1793        let mut hasher = DefaultHasher::new();
1794        s.hash(&mut hasher);
1795        format!("{:016x}", hasher.finish())
1796    }
1797
1798    fn std_dev(values: &[f64], mean: f64) -> f64 {
1799        if values.len() < 2 {
1800            return 0.0;
1801        }
1802        let variance =
1803            values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
1804        variance.sqrt()
1805    }
1806
1807    // ── Public API ─────────────────────────────────────────────────────────
1808
1809    /// Get the current honeypot state (for dashboard/export).
1810    pub fn get_state(&self) -> &HoneypotState {
1811        &self.state
1812    }
1813
1814    /// Get a snapshot of all attacker profiles.
1815    pub fn get_profiles(&self) -> Vec<&AttackerProfile> {
1816        self.state.attacker_profiles.values().collect()
1817    }
1818
1819    /// Get a specific attacker profile by ID.
1820    pub fn get_profile(&self, profile_id: &str) -> Option<&AttackerProfile> {
1821        self.state.attacker_profiles.get(profile_id)
1822    }
1823
1824    /// Get top-N most dangerous attacker profiles.
1825    pub fn get_top_threats(&self, n: usize) -> Vec<&AttackerProfile> {
1826        let mut profiles: Vec<&AttackerProfile> =
1827            self.state.attacker_profiles.values().collect();
1828        profiles.sort_by(|a, b| {
1829            b.risk_score
1830                .partial_cmp(&a.risk_score)
1831                .unwrap_or(std::cmp::Ordering::Equal)
1832        });
1833        profiles.truncate(n);
1834        profiles
1835    }
1836
1837    /// Export full state as JSON.
1838    pub fn export_json(&self) -> serde_json::Result<String> {
1839        serde_json::to_string_pretty(&self.state)
1840    }
1841
1842    /// Reset all state.
1843    pub fn reset(&mut self) {
1844        self.state = HoneypotState {
1845            config: self.config.clone(),
1846            total_requests: 0,
1847            total_attacks_detected: 0,
1848            unique_attackers: 0,
1849            attack_events: Vec::new(),
1850            attacker_profiles: HashMap::new(),
1851            uptime_seconds: 0.0,
1852            requests_per_minute: 0.0,
1853        };
1854        self.request_times.clear();
1855    }
1856}
1857
1858impl Default for HoneypotEngine {
1859    fn default() -> Self {
1860        Self::new()
1861    }
1862}
1863
1864impl Severity {
1865    fn serde_name(&self) -> Option<&str> {
1866        match self {
1867            Severity::Info => Some("info"),
1868            Severity::Low => Some("low"),
1869            Severity::Medium => Some("medium"),
1870            Severity::High => Some("high"),
1871            Severity::Critical => Some("critical"),
1872        }
1873    }
1874}
1875
1876// ═════════════════════════════════════════════════════════════════════════════
1877// Tests
1878// ═════════════════════════════════════════════════════════════════════════════
1879
1880#[cfg(test)]
1881mod tests {
1882    use super::*;
1883
1884    fn make_request(method: &str, path: &str, body: &str, headers: Vec<(&str, &str)>) -> RawRequest {
1885        let mut h = HashMap::new();
1886        for (k, v) in headers {
1887            h.insert(k.to_lowercase(), v.to_string());
1888        }
1889        RawRequest {
1890            method: method.to_string(),
1891            path: path.to_string(),
1892            query_string: String::new(),
1893            body: body.to_string(),
1894            headers: h,
1895            ip: "192.168.1.100".to_string(),
1896            timestamp: Utc::now(),
1897        }
1898    }
1899
1900    #[test]
1901    fn test_sqli_detection() {
1902        let mut engine = HoneypotEngine::new();
1903        let req = make_request(
1904            "GET",
1905            "/login",
1906            "username=admin' OR '1'='1&password=test",
1907            vec![],
1908        );
1909        let result = engine.process_request(&req);
1910        let sqli = result
1911            .detections
1912            .iter()
1913            .filter(|d| d.category == "sqli")
1914            .collect::<Vec<_>>();
1915        assert!(!sqli.is_empty(), "Should detect SQL injection");
1916    }
1917
1918    #[test]
1919    fn test_xss_detection() {
1920        let mut engine = HoneypotEngine::new();
1921        let req = make_request(
1922            "GET",
1923            "/search",
1924            "q=<script>alert('XSS')</script>",
1925            vec![],
1926        );
1927        let result = engine.process_request(&req);
1928        let xss = result
1929            .detections
1930            .iter()
1931            .filter(|d| d.category == "xss")
1932            .collect::<Vec<_>>();
1933        assert!(!xss.is_empty(), "Should detect XSS");
1934    }
1935
1936    #[test]
1937    fn test_cmdi_detection() {
1938        let mut engine = HoneypotEngine::new();
1939        let req = make_request("POST", "/api/exec", "cmd=;id", vec![]);
1940        let result = engine.process_request(&req);
1941        let cmdi = result
1942            .detections
1943            .iter()
1944            .filter(|d| d.category == "cmdi")
1945            .collect::<Vec<_>>();
1946        assert!(!cmdi.is_empty(), "Should detect command injection");
1947    }
1948
1949    #[test]
1950    fn test_path_traversal_detection() {
1951        let mut engine = HoneypotEngine::new();
1952        let req = make_request(
1953            "GET",
1954            "/download",
1955            "file=../../../etc/passwd",
1956            vec![],
1957        );
1958        let result = engine.process_request(&req);
1959        let pt = result
1960            .detections
1961            .iter()
1962            .filter(|d| d.category == "path_traversal")
1963            .collect::<Vec<_>>();
1964        assert!(!pt.is_empty(), "Should detect path traversal");
1965    }
1966
1967    #[test]
1968    fn test_ssti_detection() {
1969        let mut engine = HoneypotEngine::new();
1970        let req = make_request(
1971            "POST",
1972            "/contact",
1973            "name={{7*7}}",
1974            vec![],
1975        );
1976        let result = engine.process_request(&req);
1977        let ssti = result
1978            .detections
1979            .iter()
1980            .filter(|d| d.category == "ssti")
1981            .collect::<Vec<_>>();
1982        assert!(!ssti.is_empty(), "Should detect SSTI ({{7*7}})");
1983    }
1984
1985    #[test]
1986    fn test_lfi_detection() {
1987        let mut engine = HoneypotEngine::new();
1988        let req = make_request(
1989            "GET",
1990            "/view",
1991            "page=/etc/passwd",
1992            vec![],
1993        );
1994        let result = engine.process_request(&req);
1995        let lfi = result
1996            .detections
1997            .iter()
1998            .filter(|d| d.category == "lfi")
1999            .collect::<Vec<_>>();
2000        assert!(!lfi.is_empty(), "Should detect LFI (/etc/passwd)");
2001    }
2002
2003    #[test]
2004    fn test_ssrf_metadata_detection() {
2005        let mut engine = HoneypotEngine::new();
2006        let req = make_request(
2007            "POST",
2008            "/api/fetch",
2009            "url=http://169.254.169.254/latest/meta-data/",
2010            vec![],
2011        );
2012        let result = engine.process_request(&req);
2013        let ssrf = result
2014            .detections
2015            .iter()
2016            .filter(|d| d.category == "ssrf")
2017            .collect::<Vec<_>>();
2018        assert!(!ssrf.is_empty(), "Should detect SSRF cloud metadata probe");
2019    }
2020
2021    #[test]
2022    fn test_rsc_flight_detection() {
2023        let mut engine = HoneypotEngine::new();
2024        let req = make_request(
2025            "POST",
2026            "/",
2027            r#"0:[["$","@1",null,{"id":"malicious_component","chunks":[]}]]"#,
2028            vec![("Content-Type", "text/x-component"), ("Next-Action", "exploit")],
2029        );
2030        let result = engine.process_request(&req);
2031        let rsc = result
2032            .detections
2033            .iter()
2034            .filter(|d| d.category == "rsc_attack")
2035            .collect::<Vec<_>>();
2036        assert!(!rsc.is_empty(), "Should detect RSC/Flight protocol attack");
2037    }
2038
2039    #[test]
2040    fn test_attacker_profiling() {
2041        let mut engine = HoneypotEngine::new();
2042
2043        // Send multiple attacks from same IP
2044        for i in 0..5 {
2045            let body = format!("cmd=;id_{}", i);
2046            let req = make_request("POST", "/api/exec", &body, vec![
2047                ("User-Agent", "sqlmap/1.0"),
2048            ]);
2049            engine.process_request(&req);
2050        }
2051
2052        let profiles = engine.get_profiles();
2053        assert!(!profiles.is_empty(), "Should have at least one profile");
2054        let profile = &profiles[0];
2055        assert!(profile.total_requests >= 5);
2056        assert!(!profile.techniques_used.is_empty());
2057    }
2058
2059    #[test]
2060    fn test_ua_parsing() {
2061        let engine = HoneypotEngine::new();
2062        let fp = engine.parse_user_agent(
2063            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
2064        );
2065        let fp = fp.unwrap();
2066        assert_eq!(fp.browser, "Chrome");
2067        assert_eq!(fp.os, "Windows");
2068        assert_eq!(fp.engine, "WebKit");
2069    }
2070
2071    #[test]
2072    fn test_no_detection_on_clean_request() {
2073        let mut engine = HoneypotEngine::new();
2074        let req = make_request(
2075            "GET",
2076            "/",
2077            "",
2078            vec![("User-Agent", "Mozilla/5.0")],
2079        );
2080        let result = engine.process_request(&req);
2081        // Clean GET to / with no payloads should have zero critical/high detections
2082        let critical = result
2083            .detections
2084            .iter()
2085            .filter(|d| d.severity >= Severity::High)
2086            .count();
2087        assert_eq!(critical, 0, "Clean request should not trigger high-severity detections");
2088    }
2089
2090    #[test]
2091    fn test_risk_score() {
2092        let mut profile = AttackerProfile {
2093            profile_id: "test".to_string(),
2094            ip: "10.0.0.1".to_string(),
2095            country: None,
2096            asn: None,
2097            is_tor: false,
2098            is_cloud: false,
2099            is_proxy: false,
2100            user_agent: String::new(),
2101            browser_fingerprint: None,
2102            first_seen: String::new(),
2103            last_seen: String::new(),
2104            total_requests: 200,
2105            attack_categories: [
2106                ("sqli".to_string(), 5),
2107                ("xss".to_string(), 3),
2108                ("cmdi".to_string(), 2),
2109            ]
2110            .into_iter()
2111            .collect(),
2112            techniques_used: vec!["union_select".to_string(), "stacked".to_string(), "reflected".to_string()],
2113            avg_request_interval: 0.05,
2114            is_automated: true,
2115            risk_score: 0.0,
2116            targets: vec!["/login".to_string(), "/api/exec".to_string()],
2117            event_timeline: vec![],
2118        };
2119
2120        let score = HoneypotEngine::calculate_risk_score(&profile);
2121        profile.risk_score = score;
2122
2123        assert!(score > 60.0, "Risk score should be high for diverse attacks: got {}", score);
2124        assert!(score <= 100.0, "Risk score should not exceed 100");
2125    }
2126
2127    #[test]
2128    fn test_export_json() {
2129        let mut engine = HoneypotEngine::new();
2130        let req = make_request(
2131            "POST",
2132            "/api/login",
2133            "user=admin'--",
2134            vec![("User-Agent", "Mozilla/5.0")],
2135        );
2136        engine.process_request(&req);
2137
2138        let json = engine.export_json().unwrap();
2139        assert!(json.contains("sqli"));
2140        assert!(json.contains("admin"));
2141    }
2142
2143    #[test]
2144    fn test_fake_rsc_response() {
2145        let engine = HoneypotEngine::new();
2146        let req = make_request(
2147            "POST",
2148            "/dashboard",
2149            "[]",
2150            vec![("Content-Type", "text/x-component"), ("Next-Action", "test")],
2151        );
2152        let body = engine.generate_fake_rsc_response(&req);
2153        assert!(!body.is_empty());
2154        assert!(body.contains("$") || body.contains("pageProps") || body.contains("status"));
2155    }
2156
2157    #[test]
2158    fn test_fake_html_response() {
2159        let engine = HoneypotEngine::new();
2160        let req = make_request("GET", "/", "", vec![]);
2161        let body = engine.generate_fake_html_response(&req);
2162        assert!(body.contains("<html"));
2163        assert!(body.contains("Next.js"));
2164        assert!(body.contains("__next"));
2165    }
2166}