1use regex::RegexSet;
11use serde::{Deserialize, Serialize};
12
13use crate::{WafDecision, WafRequest};
14
15#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
17#[serde(rename_all = "snake_case")]
18pub enum BotMode {
19 #[default]
21 Block,
22 Challenge,
24 LogOnly,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct BotConfig {
31 #[serde(default)]
32 pub enabled: bool,
33 #[serde(default)]
34 pub mode: BotMode,
35 #[serde(default = "default_good_bots")]
37 pub good_bots: Vec<String>,
38 #[serde(default = "default_signal_threshold")]
40 pub signal_threshold: u32,
41}
42
43fn default_good_bots() -> Vec<String> {
44 vec![
45 "Googlebot".into(),
46 "Bingbot".into(),
47 "Slurp".into(),
48 "DuckDuckBot".into(),
49 "facebookexternalhit".into(),
50 "Twitterbot".into(),
51 "LinkedInBot".into(),
52 "Slackbot".into(),
53 "Applebot".into(),
54 "YandexBot".into(),
55 ]
56}
57
58fn default_signal_threshold() -> u32 {
59 2
60}
61
62impl Default for BotConfig {
63 fn default() -> Self {
64 Self {
65 enabled: false,
66 mode: BotMode::Block,
67 good_bots: default_good_bots(),
68 signal_threshold: default_signal_threshold(),
69 }
70 }
71}
72
73pub struct BotDetector {
75 config: BotConfig,
76 good_bot_patterns: RegexSet,
77}
78
79impl BotDetector {
80 pub fn new(config: BotConfig) -> Self {
81 let patterns: Vec<String> = config
83 .good_bots
84 .iter()
85 .map(|b| format!("(?i){}", regex::escape(b)))
86 .collect();
87 let good_bot_patterns = if patterns.is_empty() {
88 RegexSet::new::<&[&str], &&str>(&[]).expect("empty regex set")
89 } else {
90 RegexSet::new(&patterns).expect("good bot patterns must compile")
91 };
92
93 Self {
94 config,
95 good_bot_patterns,
96 }
97 }
98
99 fn is_good_bot(&self, ua: &str) -> bool {
100 self.good_bot_patterns.is_match(ua)
101 }
102
103 fn compute_signals(&self, req: &WafRequest) -> (u32, Vec<&'static str>) {
106 let mut signals = Vec::new();
107 let mut count = 0u32;
108
109 match &req.user_agent {
111 None => {
112 signals.push("missing User-Agent");
113 count += 1;
114 }
115 Some(ua) if ua.trim().is_empty() => {
116 signals.push("empty User-Agent");
117 count += 1;
118 }
119 _ => {}
120 }
121
122 let has_accept = req.headers.keys().any(|k| k.eq_ignore_ascii_case("accept"));
124 if !has_accept {
125 signals.push("missing Accept header");
126 count += 1;
127 }
128
129 let has_lang = req
131 .headers
132 .keys()
133 .any(|k| k.eq_ignore_ascii_case("accept-language"));
134 if !has_lang {
135 signals.push("missing Accept-Language header");
136 count += 1;
137 }
138
139 if let Some(ref ua) = req.user_agent {
141 if crate::rules::scanner::check_scanner(ua).is_some() {
142 signals.push("known scanner user-agent");
143 count += 1;
144 }
145 }
146
147 let has_encoding = req
149 .headers
150 .keys()
151 .any(|k| k.eq_ignore_ascii_case("accept-encoding"));
152 if !has_encoding {
153 signals.push("missing Accept-Encoding header");
154 count += 1;
155 }
156
157 (count, signals)
158 }
159
160 pub fn check(&self, req: &WafRequest) -> Option<WafDecision> {
162 if !self.config.enabled {
163 return None;
164 }
165
166 if let Some(ref ua) = req.user_agent {
168 if self.is_good_bot(ua) {
169 return None;
170 }
171 }
172
173 let (signal_count, signals) = self.compute_signals(req);
174
175 if signal_count < self.config.signal_threshold {
176 return None;
177 }
178
179 let reason = format!(
180 "bot detected ({signal_count} signals: {})",
181 signals.join(", ")
182 );
183
184 match self.config.mode {
185 BotMode::Block => Some(WafDecision::Block {
186 status: 403,
187 reason,
188 rule: "bot_detection".into(),
189 }),
190 BotMode::Challenge => Some(WafDecision::Challenge {
191 html: generate_challenge_html(),
192 }),
193 BotMode::LogOnly => {
194 tracing::info!(signals = signal_count, details = ?signals, "bot detected (log-only)");
195 None
196 }
197 }
198 }
199}
200
201fn generate_challenge_html() -> String {
203 r#"<!DOCTYPE html>
204<html>
205<head><title>Checking your browser</title></head>
206<body>
207<noscript>Please enable JavaScript to continue.</noscript>
208<p id="msg">Verifying your browser...</p>
209<script>
210(function(){
211 var ts = Date.now();
212 var token = btoa(String(ts) + ':' + navigator.userAgent.length);
213 document.cookie = '__waf_challenge=' + token + '; path=/; max-age=3600; SameSite=Strict';
214 document.getElementById('msg').textContent = 'Redirecting...';
215 setTimeout(function(){ location.reload(); }, 500);
216})();
217</script>
218</body>
219</html>"#
220 .to_string()
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226
227 fn make_req_with_headers(ua: Option<&str>, headers: Vec<(&str, &str)>) -> WafRequest {
228 WafRequest {
229 client_ip: "10.0.0.1".parse().unwrap(),
230 method: "GET".into(),
231 path: "/".into(),
232 query: None,
233 headers: headers
234 .into_iter()
235 .map(|(k, v)| (k.into(), v.into()))
236 .collect(),
237 body: None,
238 user_agent: ua.map(String::from),
239 }
240 }
241
242 fn browser_headers() -> Vec<(&'static str, &'static str)> {
243 vec![
244 ("Accept", "text/html,application/xhtml+xml"),
245 ("Accept-Language", "en-US,en;q=0.9"),
246 ("Accept-Encoding", "gzip, deflate, br"),
247 ]
248 }
249
250 #[test]
251 fn disabled_allows_all() {
252 let detector = BotDetector::new(BotConfig::default());
253 let req = make_req_with_headers(None, vec![]);
254 assert!(detector.check(&req).is_none());
255 }
256
257 #[test]
258 fn good_bot_allowed() {
259 let config = BotConfig {
260 enabled: true,
261 signal_threshold: 1,
262 ..Default::default()
263 };
264 let detector = BotDetector::new(config);
265 let req = make_req_with_headers(
266 Some("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"),
267 vec![], );
269 assert!(detector.check(&req).is_none());
270 }
271
272 #[test]
273 fn real_browser_allowed() {
274 let config = BotConfig {
275 enabled: true,
276 signal_threshold: 2,
277 ..Default::default()
278 };
279 let detector = BotDetector::new(config);
280 let req = make_req_with_headers(
281 Some("Mozilla/5.0 (Windows NT 10.0) Chrome/120.0"),
282 browser_headers(),
283 );
284 assert!(detector.check(&req).is_none());
285 }
286
287 #[test]
288 fn no_ua_and_no_headers_detected() {
289 let config = BotConfig {
290 enabled: true,
291 signal_threshold: 2,
292 ..Default::default()
293 };
294 let detector = BotDetector::new(config);
295 let req = make_req_with_headers(None, vec![]);
296 let decision = detector.check(&req);
297 assert!(matches!(decision, Some(WafDecision::Block { .. })));
298 }
299
300 #[test]
301 fn scanner_ua_detected() {
302 let config = BotConfig {
303 enabled: true,
304 signal_threshold: 2,
305 ..Default::default()
306 };
307 let detector = BotDetector::new(config);
308 let req = make_req_with_headers(Some("sqlmap/1.5"), vec![]);
309 let decision = detector.check(&req);
310 assert!(decision.is_some());
311 }
312
313 #[test]
314 fn challenge_mode_returns_html() {
315 let config = BotConfig {
316 enabled: true,
317 mode: BotMode::Challenge,
318 signal_threshold: 2,
319 ..Default::default()
320 };
321 let detector = BotDetector::new(config);
322 let req = make_req_with_headers(None, vec![]);
323 let decision = detector.check(&req);
324 match decision {
325 Some(WafDecision::Challenge { html }) => {
326 assert!(html.contains("__waf_challenge"));
327 assert!(html.contains("<script>"));
328 }
329 _ => panic!("expected Challenge decision"),
330 }
331 }
332
333 #[test]
334 fn log_only_mode_allows() {
335 let config = BotConfig {
336 enabled: true,
337 mode: BotMode::LogOnly,
338 signal_threshold: 1,
339 ..Default::default()
340 };
341 let detector = BotDetector::new(config);
342 let req = make_req_with_headers(None, vec![]);
343 assert!(detector.check(&req).is_none());
345 }
346
347 #[test]
348 fn below_threshold_allowed() {
349 let config = BotConfig {
350 enabled: true,
351 signal_threshold: 5, ..Default::default()
353 };
354 let detector = BotDetector::new(config);
355 let req = make_req_with_headers(
357 None,
358 vec![
359 ("Accept", "text/html"),
360 ("Accept-Language", "en"),
361 ("Accept-Encoding", "gzip"),
362 ],
363 );
364 assert!(detector.check(&req).is_none());
365 }
366
367 #[test]
368 fn challenge_html_is_valid() {
369 let html = generate_challenge_html();
370 assert!(html.contains("<!DOCTYPE html>"));
371 assert!(html.contains("__waf_challenge"));
372 assert!(html.contains("location.reload()"));
373 }
374}