const BOT_PATTERNS: &[&str] = &[
"googlebot",
"bingbot",
"yandexbot",
"baiduspider",
"duckduckbot",
"slurp",
"sogou",
"exabot",
"ia_archiver",
"facebookexternalhit",
"twitterbot",
"linkedinbot",
"whatsapp",
"telegrambot",
"discordbot",
"slackbot",
"uptimerobot",
"pingdom",
"site24x7",
"statuscake",
"semrushbot",
"ahrefsbot",
"mj12bot",
"dotbot",
"rogerbot",
"screaming frog",
"headlesschrome",
"phantomjs",
"puppeteer",
"playwright",
"bot/",
"bot;",
"crawler",
"spider",
"scraper",
"http-client",
"python-requests",
"python-urllib",
"go-http-client",
"java/",
"wget",
"curl/",
"libwww",
"apache-httpclient",
"okhttp",
"node-fetch",
"axios",
"postman",
];
pub fn is_bot(user_agent: Option<&str>) -> bool {
let ua = match user_agent {
Some(ua) if !ua.is_empty() => ua,
_ => return false,
};
is_bot_lower(&ua.to_ascii_lowercase())
}
pub fn is_bot_lower(ua_lower: &str) -> bool {
BOT_PATTERNS
.iter()
.any(|pattern| ua_lower.contains(pattern))
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::indexing_slicing)]
mod tests {
use super::*;
#[tokio::test]
async fn detects_googlebot() {
assert!(is_bot(Some(
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
)));
}
#[tokio::test]
async fn detects_headless_chrome() {
assert!(is_bot(Some("Mozilla/5.0 HeadlessChrome/90.0.4430.212")));
}
#[tokio::test]
async fn detects_curl() {
assert!(is_bot(Some("curl/7.68.0")));
}
#[tokio::test]
async fn allows_real_browser() {
assert!(!is_bot(Some(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)));
}
#[tokio::test]
async fn allows_mobile_browser() {
assert!(!is_bot(Some(
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1"
)));
}
#[tokio::test]
async fn missing_ua_is_not_bot() {
assert!(!is_bot(None));
assert!(!is_bot(Some("")));
}
}