Skip to main content

systemprompt_analytics/services/
bot_keywords.rs

1pub const BOT_KEYWORDS: &[&str] = &[
2    "bot",
3    "crawler",
4    "spider",
5    "scraper",
6    "crawling",
7    "googlebot",
8    "google-inspectiontool",
9    "adsbot-google",
10    "googleother",
11    "bingbot",
12    "bingpreview",
13    "msnbot",
14    "baiduspider",
15    "yandexbot",
16    "yandex.com/bots",
17    "duckduckbot",
18    "slurp",
19    "yahoo",
20    "facebookexternalhit",
21    "facebookcatalog",
22    "facebot",
23    "meta-externalagent",
24    "twitterbot",
25    "linkedinbot",
26    "slackbot",
27    "discordbot",
28    "whatsapp",
29    "telegrambot",
30    "pinterestbot",
31    "petalbot",
32    "sogou",
33    "applebot",
34    "dotbot",
35    "semrushbot",
36    "ahrefsbot",
37    "majesticbot",
38    "mj12bot",
39    "rogerbot",
40    "exabot",
41    "sistrix",
42    "seolyt",
43    "barkrowler",
44    "blexbot",
45    "bubing",
46    "cliqzbot",
47    "uptimerobot",
48    "pingdom",
49    "statuscake",
50    "site24x7",
51    "lighthouse",
52    "pagespeed",
53    "speedcurve",
54    "headless",
55    "phantom",
56    "selenium",
57    "webdriver",
58    "puppeteer",
59    "archive.org_bot",
60    "ia_archiver",
61    "embedly",
62    "flipboard",
63    "google-structured-data-testing-tool",
64    "scrapy",
65    "python-requests",
66    "python-urllib",
67    "python-httpx",
68    "httpx/",
69    "aiohttp",
70    "curl",
71    "wget",
72    "libwww",
73    "http.rb",
74    "guzzlehttp",
75    "okhttp",
76    "apache-httpclient",
77    "go-http-client",
78    "node-fetch",
79    "axios",
80    "java/",
81    "perl/",
82    "ruby/",
83    "playstore-google",
84    "google-read-aloud",
85    "prefetch proxy",
86    "surf/",
87    "amazon-quick",
88    "gzip(gfe)",
89    "rv:1.9",
90];
91
92pub const BOT_IP_PREFIXES: &[&str] = &[
93    "66.249.", "40.77.", "157.55.", "207.46.", "69.171.", "173.252.", "31.13.",
94];
95
96pub fn is_malformed_user_agent(user_agent: &str) -> bool {
97    if user_agent.is_empty() || user_agent.len() < 10 {
98        return true;
99    }
100
101    let trimmed = user_agent.trim();
102    if trimmed.starts_with('{') || trimmed.ends_with('}') {
103        return true;
104    }
105
106    let lower = trimmed.to_lowercase();
107    matches!(lower.as_str(), "-" | "null" | "unknown")
108}
109
110pub fn matches_bot_pattern(user_agent: &str) -> bool {
111    if is_malformed_user_agent(user_agent) {
112        return true;
113    }
114
115    let ua_lower = user_agent.to_lowercase();
116
117    if BOT_KEYWORDS
118        .iter()
119        .any(|keyword| ua_lower.contains(keyword))
120    {
121        return true;
122    }
123
124    if ua_lower.contains("compatible")
125        && !ua_lower.contains("chrome")
126        && !ua_lower.contains("firefox")
127        && !ua_lower.contains("safari")
128        && !ua_lower.contains("edge")
129    {
130        return true;
131    }
132
133    false
134}
135
136pub fn matches_bot_ip_range(ip: &str) -> bool {
137    BOT_IP_PREFIXES.iter().any(|prefix| ip.starts_with(prefix))
138}