Skip to main content

edgeguard/
waf.rs

1//! WAF-lite: heuristic input inspection (Phase 4 / v2).
2//!
3//! A small request screener that runs in the proxy pipeline after auth and the size/method
4//! checks, just before a request is forwarded. It matches the request against built-in
5//! heuristic rulesets (SQL-injection, cross-site-scripting, path-traversal) and any
6//! operator-defined [`crate::config::WafRule`] deny patterns.
7//!
8//! Three things keep this honest and safe rather than a foot-gun:
9//!
10//! * **Off by default, report-first.** `mode = "off"` makes [`WafEngine::evaluate`] a no-op
11//!   with zero per-request work. `report` evaluates rules and logs/counts matches but still
12//!   forwards the request, so an operator can roll out a ruleset and watch
13//!   `edgeguard_waf_hits_total` for false positives before switching to `block` (`403`).
14//! * **Heuristics, acknowledged.** The built-in patterns are signatures, not a full WAF; they
15//!   miss novel payloads and occasionally false-positive. That trade-off is why they default
16//!   off and ship the report-first workflow.
17//! * **ReDoS-safe matching.** Patterns compile to the `regex` crate's RE2 engine, which runs
18//!   in linear time and rejects backreferences/lookaround, so an operator-supplied pattern
19//!   can't pin a CPU with catastrophic backtracking. A pattern that fails to compile is
20//!   rejected at startup/reload like any other config error.
21//!
22//! Like [`crate::config::parse_host_port`], the percent-decoder here is deliberately minimal:
23//! the proxy doesn't need a full URL library to surface `%2e%2e%2f` as `../`.
24
25use anyhow::{Context, Result};
26use axum::http::HeaderMap;
27use regex::RegexSet;
28
29use crate::config::WafCfg;
30
31/// What the engine does with a request that matches a rule.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum WafMode {
34    /// Disabled — [`WafEngine::evaluate`] is a no-op. Default.
35    Off,
36    /// Evaluate rules and log/count matches, but forward the request anyway.
37    Report,
38    /// Reject a matching request with `403 Forbidden`.
39    Block,
40}
41
42impl WafMode {
43    fn parse(s: &str) -> Result<WafMode> {
44        match s.trim().to_ascii_lowercase().as_str() {
45            "off" | "disabled" | "" => Ok(WafMode::Off),
46            "report" | "report-only" | "detect" => Ok(WafMode::Report),
47            "block" | "enforce" | "deny" => Ok(WafMode::Block),
48            other => anyhow::bail!("invalid waf.mode {other:?} (expected off|report|block)"),
49        }
50    }
51}
52
53/// A request location a rule can inspect.
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55enum Location {
56    Path,
57    Headers,
58    Body,
59}
60
61impl Location {
62    fn as_str(self) -> &'static str {
63        match self {
64            Location::Path => "path",
65            Location::Headers => "headers",
66            Location::Body => "body",
67        }
68    }
69}
70
71/// The set of locations a rule applies to. Built-in rules use [`Target::ALL`]; a custom rule
72/// carries the single location parsed from its `target` field.
73#[derive(Debug, Clone, Copy)]
74struct Target {
75    path: bool,
76    headers: bool,
77    body: bool,
78}
79
80impl Target {
81    const ALL: Target = Target {
82        path: true,
83        headers: true,
84        body: true,
85    };
86
87    fn parse(s: &str) -> Result<Target> {
88        match s.trim().to_ascii_lowercase().as_str() {
89            "path" | "" => Ok(Target {
90                path: true,
91                headers: false,
92                body: false,
93            }),
94            "headers" | "header" => Ok(Target {
95                path: false,
96                headers: true,
97                body: false,
98            }),
99            "body" => Ok(Target {
100                path: false,
101                headers: false,
102                body: true,
103            }),
104            "all" | "any" => Ok(Target::ALL),
105            other => {
106                anyhow::bail!("invalid waf rule target {other:?} (expected path|headers|body|all)")
107            }
108        }
109    }
110
111    fn includes(&self, loc: Location) -> bool {
112        match loc {
113            Location::Path => self.path,
114            Location::Headers => self.headers,
115            Location::Body => self.body,
116        }
117    }
118}
119
120/// A compiled rule: a reporting id + metric class, the locations it applies to, and a set of
121/// patterns (any match is a hit). Built-in rulesets compile their whole category into one set.
122struct CompiledRule {
123    /// Reported in logs (built-in category name, or the operator's rule id).
124    id: String,
125    /// Coarse class for metrics: `sqli` | `xss` | `path_traversal` | `custom`.
126    class: &'static str,
127    target: Target,
128    set: RegexSet,
129}
130
131impl CompiledRule {
132    fn hit(&self, location: Location) -> WafHit {
133        WafHit {
134            rule_id: self.id.clone(),
135            class: self.class,
136            location: location.as_str(),
137        }
138    }
139}
140
141/// The outcome of a matching rule: which rule fired, its metric class, and where it matched.
142pub struct WafHit {
143    pub rule_id: String,
144    pub class: &'static str,
145    pub location: &'static str,
146}
147
148/// Built-in SQL-injection signatures (case-insensitive). Heuristic: tuned to catch the common
149/// boolean/union/stacked/time-based shapes while not firing on prose that merely contains a
150/// keyword like "union".
151const SQLI: &[&str] = &[
152    r"(?i)\bunion\b\s+(all\s+)?\bselect\b",
153    r"(?i)\bor\b\s+\d+\s*=\s*\d+",
154    r"(?i)'\s*or\s+'",
155    r"(?i)\bdrop\s+table\b",
156    r"(?i)\binsert\s+into\b",
157    r"(?i);\s*(drop|delete|update|insert|select)\b",
158    r"(?i)\b(sleep|benchmark|pg_sleep)\s*\(",
159    r"(?i)\bwaitfor\s+delay\b",
160    r"(?i)\binformation_schema\b",
161    r"(?i)\bxp_cmdshell\b",
162];
163
164/// Built-in cross-site-scripting signatures (case-insensitive).
165const XSS: &[&str] = &[
166    r"(?i)<\s*script\b",
167    r"(?i)<\s*/\s*script\s*>",
168    r"(?i)javascript:",
169    r"(?i)\bon(error|load|click|mouseover|focus|submit|toggle)\s*=",
170    r"(?i)<\s*iframe\b",
171    r"(?i)<\s*img\b[^>]*\bonerror\b",
172    r"(?i)<\s*svg\b[^>]*\bonload\b",
173    r"(?i)document\s*\.\s*cookie",
174];
175
176/// Built-in path-traversal signatures. The raw and percent-decoded path are both inspected, so
177/// the encoded variants here mainly backstop double-encoding and matches in headers/body (which
178/// are not decoded).
179const TRAVERSAL: &[&str] = &[
180    r"\.\./",
181    r"\.\.\\",
182    r"(?i)%2e%2e(%2f|%5c|/|\\)",
183    r"(?i)\.\.%2f",
184    r"(?i)\.\.%5c",
185    r"(?i)/etc/passwd\b",
186    r"(?i)/proc/self/",
187    r"(?i)c:\\(?:windows|winnt)\b",
188];
189
190/// The compiled WAF engine, held in the hot-swappable [`crate::proxy::Runtime`].
191pub struct WafEngine {
192    mode: WafMode,
193    inspect_path: bool,
194    inspect_headers: bool,
195    inspect_body: bool,
196    rules: Vec<CompiledRule>,
197}
198
199impl WafEngine {
200    /// Compile the engine from config. When `mode = "off"` an inert engine is returned without
201    /// compiling anything (so a disabled WAF costs nothing). Otherwise every enabled built-in
202    /// ruleset and every custom pattern is compiled; an empty or invalid custom pattern, or an
203    /// unknown `target`, fails the build so the misconfiguration surfaces at startup/reload.
204    pub fn build(cfg: &WafCfg) -> Result<WafEngine> {
205        let mode = WafMode::parse(&cfg.mode).context("waf.mode")?;
206        if mode == WafMode::Off {
207            return Ok(WafEngine::disabled());
208        }
209
210        let mut rules = Vec::new();
211        if cfg.sqli {
212            rules.push(builtin("sqli", "sqli", SQLI)?);
213        }
214        if cfg.xss {
215            rules.push(builtin("xss", "xss", XSS)?);
216        }
217        if cfg.path_traversal {
218            rules.push(builtin("path_traversal", "path_traversal", TRAVERSAL)?);
219        }
220        for (i, rule) in cfg.rules.iter().enumerate() {
221            anyhow::ensure!(
222                !rule.pattern.trim().is_empty(),
223                "waf.rules[{i}].pattern must not be empty"
224            );
225            let id = if rule.id.trim().is_empty() {
226                format!("custom-{i}")
227            } else {
228                rule.id.clone()
229            };
230            let target =
231                Target::parse(&rule.target).with_context(|| format!("waf.rules[{i}] ({id})"))?;
232            let set = RegexSet::new([rule.pattern.as_str()])
233                .with_context(|| format!("compiling waf.rules[{i}] ({id}) pattern"))?;
234            rules.push(CompiledRule {
235                id,
236                class: "custom",
237                target,
238                set,
239            });
240        }
241
242        Ok(WafEngine {
243            mode,
244            inspect_path: cfg.inspect_path,
245            inspect_headers: cfg.inspect_headers,
246            inspect_body: cfg.inspect_body,
247            rules,
248        })
249    }
250
251    /// An inert engine (`mode = "off"`): no rules, inspects nothing.
252    fn disabled() -> WafEngine {
253        WafEngine {
254            mode: WafMode::Off,
255            inspect_path: false,
256            inspect_headers: false,
257            inspect_body: false,
258            rules: Vec::new(),
259        }
260    }
261
262    pub fn mode(&self) -> WafMode {
263        self.mode
264    }
265
266    /// Evaluate a request against the rules and return the first match, if any. Returns `None`
267    /// immediately when disabled. Each enabled location's inspection text is assembled at most
268    /// once, then every rule that targets that location is checked against it. The path is
269    /// checked both raw and percent-decoded (so `%2e%2e%2f` is caught as `../`); headers and
270    /// body are matched as-is.
271    pub fn evaluate(
272        &self,
273        path_and_query: &str,
274        headers: &HeaderMap,
275        body: &[u8],
276    ) -> Option<WafHit> {
277        if self.mode == WafMode::Off || self.rules.is_empty() {
278            return None;
279        }
280
281        // Only percent-decode when there's something to decode (the common path has no `%`).
282        let decoded_path = if self.inspect_path && path_and_query.contains('%') {
283            Some(percent_decode_lossy(path_and_query))
284        } else {
285            None
286        };
287        let header_text = if self.inspect_headers {
288            Some(join_header_values(headers))
289        } else {
290            None
291        };
292        let body_text = if self.inspect_body && !body.is_empty() {
293            Some(String::from_utf8_lossy(body))
294        } else {
295            None
296        };
297
298        for rule in &self.rules {
299            if self.inspect_path && rule.target.includes(Location::Path) {
300                let decoded_hit = decoded_path
301                    .as_deref()
302                    .is_some_and(|d| rule.set.is_match(d));
303                if rule.set.is_match(path_and_query) || decoded_hit {
304                    return Some(rule.hit(Location::Path));
305                }
306            }
307            if let Some(ht) = &header_text {
308                if rule.target.includes(Location::Headers) && rule.set.is_match(ht) {
309                    return Some(rule.hit(Location::Headers));
310                }
311            }
312            if let Some(bt) = &body_text {
313                if rule.target.includes(Location::Body) && rule.set.is_match(bt) {
314                    return Some(rule.hit(Location::Body));
315                }
316            }
317        }
318        None
319    }
320}
321
322/// Compile a built-in ruleset (a whole category) into one [`CompiledRule`] applying to every
323/// location.
324fn builtin(id: &str, class: &'static str, patterns: &[&str]) -> Result<CompiledRule> {
325    let set =
326        RegexSet::new(patterns).with_context(|| format!("compiling built-in {id} ruleset"))?;
327    Ok(CompiledRule {
328        id: id.to_string(),
329        class,
330        target: Target::ALL,
331        set,
332    })
333}
334
335/// Minimal, lossy percent-decoder for the request path/query: decodes `%XX` escapes and leaves
336/// a malformed or truncated escape as the literal bytes. Decoded bytes are interpreted as UTF-8
337/// lossily. Single-pass — it does not chase double-encoding (the built-in encoded patterns
338/// backstop that), staying deliberately small in the spirit of `config::parse_host_port`.
339fn percent_decode_lossy(s: &str) -> String {
340    let bytes = s.as_bytes();
341    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
342    let mut i = 0;
343    while i < bytes.len() {
344        if bytes[i] == b'%' && i + 2 < bytes.len() {
345            let hi = (bytes[i + 1] as char).to_digit(16);
346            let lo = (bytes[i + 2] as char).to_digit(16);
347            if let (Some(h), Some(l)) = (hi, lo) {
348                out.push((h * 16 + l) as u8);
349                i += 3;
350                continue;
351            }
352        }
353        out.push(bytes[i]);
354        i += 1;
355    }
356    String::from_utf8_lossy(&out).into_owned()
357}
358
359/// Join header `name: value` lines into one string for inspection. Names are included so a rule
360/// can target a specific header; values that aren't valid UTF-8 are skipped (they can't carry a
361/// textual signature we'd match).
362fn join_header_values(headers: &HeaderMap) -> String {
363    let mut out = String::new();
364    for (name, value) in headers.iter() {
365        if let Ok(v) = value.to_str() {
366            out.push_str(name.as_str());
367            out.push_str(": ");
368            out.push_str(v);
369            out.push('\n');
370        }
371    }
372    out
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use crate::config::WafRule;
379    use axum::http::{HeaderMap, HeaderValue};
380
381    fn engine(cfg: WafCfg) -> WafEngine {
382        WafEngine::build(&cfg).unwrap()
383    }
384
385    fn block_cfg() -> WafCfg {
386        WafCfg {
387            mode: "block".into(),
388            ..Default::default()
389        }
390    }
391
392    fn eval_path(e: &WafEngine, p: &str) -> Option<WafHit> {
393        e.evaluate(p, &HeaderMap::new(), b"")
394    }
395
396    #[test]
397    fn mode_parses_known_and_rejects_unknown() {
398        assert_eq!(WafMode::parse("off").unwrap(), WafMode::Off);
399        assert_eq!(WafMode::parse("").unwrap(), WafMode::Off);
400        assert_eq!(WafMode::parse("REPORT").unwrap(), WafMode::Report);
401        assert_eq!(WafMode::parse(" block ").unwrap(), WafMode::Block);
402        assert!(WafMode::parse("banana").is_err());
403    }
404
405    #[test]
406    fn off_by_default_is_inert() {
407        let e = engine(WafCfg::default()); // mode "off"
408        assert_eq!(e.mode(), WafMode::Off);
409        // Even blatant payloads are ignored when the engine is off.
410        assert!(eval_path(&e, "/?q=' OR '1'='1").is_none());
411        assert!(eval_path(&e, "/../../etc/passwd").is_none());
412    }
413
414    #[test]
415    fn detects_sqli_in_path() {
416        let e = engine(block_cfg());
417        assert_eq!(
418            eval_path(&e, "/items?q=1 UNION SELECT password FROM users")
419                .unwrap()
420                .class,
421            "sqli"
422        );
423        assert!(eval_path(&e, "/login?u=admin&p=x' OR '1'='1").is_some());
424        // Prose that merely contains "union" must not trip the union/select rule.
425        assert!(eval_path(&e, "/articles/the-european-union-explained").is_none());
426    }
427
428    #[test]
429    fn detects_xss_in_path_raw_and_encoded() {
430        let e = engine(block_cfg());
431        assert_eq!(
432            eval_path(&e, "/p?c=<script>alert(1)</script>")
433                .unwrap()
434                .class,
435            "xss"
436        );
437        // Percent-encoded `<script>` is decoded before matching.
438        assert!(eval_path(&e, "/p?c=%3Cscript%3E").is_some());
439        assert!(eval_path(&e, "/go?to=javascript:alert(1)").is_some());
440        assert!(eval_path(&e, "/search?q=hello world").is_none());
441    }
442
443    #[test]
444    fn detects_path_traversal_raw_and_encoded() {
445        let e = engine(block_cfg());
446        assert_eq!(
447            eval_path(&e, "/static/../../etc/passwd").unwrap().class,
448            "path_traversal"
449        );
450        assert!(eval_path(&e, "/static/%2e%2e%2f%2e%2e%2fsecret").is_some());
451        assert!(eval_path(&e, "/static/app.bundle.js").is_none());
452    }
453
454    #[test]
455    fn categories_can_be_disabled_individually() {
456        let cfg = WafCfg {
457            mode: "block".into(),
458            sqli: false,
459            ..Default::default()
460        };
461        let e = engine(cfg);
462        // SQLi disabled -> not detected; XSS still on.
463        assert!(eval_path(&e, "/?q=1 UNION SELECT 1").is_none());
464        assert!(eval_path(&e, "/?q=<script>x</script>").is_some());
465    }
466
467    #[test]
468    fn custom_rule_matches_only_its_target_location() {
469        let cfg = WafCfg {
470            mode: "block".into(),
471            sqli: false,
472            xss: false,
473            path_traversal: false,
474            inspect_headers: true,
475            rules: vec![WafRule {
476                id: "wp".into(),
477                pattern: r"(?i)/wp-admin".into(),
478                target: "path".into(),
479            }],
480            ..Default::default()
481        };
482        let e = engine(cfg);
483
484        let hit = eval_path(&e, "/wp-admin/index.php").unwrap();
485        assert_eq!(hit.rule_id, "wp");
486        assert_eq!(hit.class, "custom");
487        assert_eq!(hit.location, "path");
488
489        // The same string in a header is not matched — the rule targets the path only.
490        let mut h = HeaderMap::new();
491        h.insert("x-test", HeaderValue::from_static("/wp-admin"));
492        assert!(e.evaluate("/safe", &h, b"").is_none());
493    }
494
495    #[test]
496    fn headers_and_body_only_inspected_when_enabled() {
497        // Defaults: inspect_headers/body off -> a header/body payload is ignored.
498        let e = engine(block_cfg());
499        let mut h = HeaderMap::new();
500        h.insert("user-agent", HeaderValue::from_static("<script>x</script>"));
501        assert!(e.evaluate("/", &h, b"<script>x</script>").is_none());
502
503        // With both enabled, the same payloads are caught and the location is reported.
504        let e2 = engine(WafCfg {
505            mode: "block".into(),
506            inspect_headers: true,
507            inspect_body: true,
508            ..Default::default()
509        });
510        assert_eq!(e2.evaluate("/", &h, b"").unwrap().location, "headers");
511        assert_eq!(
512            e2.evaluate("/", &HeaderMap::new(), b"<script>x</script>")
513                .unwrap()
514                .location,
515            "body"
516        );
517    }
518
519    #[test]
520    fn build_rejects_bad_custom_pattern_empty_pattern_and_target() {
521        // Uncompilable regex.
522        assert!(WafEngine::build(&WafCfg {
523            mode: "block".into(),
524            rules: vec![WafRule {
525                id: "bad".into(),
526                pattern: "(".into(),
527                target: "path".into(),
528            }],
529            ..Default::default()
530        })
531        .is_err());
532
533        // Empty pattern.
534        assert!(WafEngine::build(&WafCfg {
535            mode: "report".into(),
536            rules: vec![WafRule {
537                pattern: "   ".into(),
538                ..Default::default()
539            }],
540            ..Default::default()
541        })
542        .is_err());
543
544        // Unknown target.
545        assert!(WafEngine::build(&WafCfg {
546            mode: "block".into(),
547            rules: vec![WafRule {
548                pattern: "a".into(),
549                target: "cookie".into(),
550                ..Default::default()
551            }],
552            ..Default::default()
553        })
554        .is_err());
555    }
556
557    #[test]
558    fn build_rejects_invalid_mode() {
559        assert!(WafEngine::build(&WafCfg {
560            mode: "audit".into(),
561            ..Default::default()
562        })
563        .is_err());
564    }
565
566    #[test]
567    fn percent_decode_handles_escapes_and_malformed() {
568        assert_eq!(percent_decode_lossy("%2e%2e%2f"), "../");
569        assert_eq!(percent_decode_lossy("a%2Fb"), "a/b");
570        // Malformed/truncated escapes are left literal.
571        assert_eq!(percent_decode_lossy("100%"), "100%");
572        assert_eq!(percent_decode_lossy("%zz"), "%zz");
573        assert_eq!(percent_decode_lossy("ab%2"), "ab%2");
574    }
575
576    #[test]
577    fn report_mode_still_returns_hits() {
578        let e = engine(WafCfg {
579            mode: "report".into(),
580            ..Default::default()
581        });
582        assert_eq!(e.mode(), WafMode::Report);
583        assert!(eval_path(&e, "/?c=<script>").is_some());
584    }
585}