Skip to main content

systemprompt_analytics/services/
bot_keywords.rs

1pub const BOT_KEYWORDS: &[&str] = &[
2    "bot",
3    "crawler",
4    "spider",
5    "scraper",
6    "crawling",
7    "googlebot",
8    "google-inspectiontool",
9    "adsbot-google",
10    "googleother",
11    "bingbot",
12    "bingpreview",
13    "msnbot",
14    "baiduspider",
15    "yandexbot",
16    "yandex.com/bots",
17    "duckduckbot",
18    "slurp",
19    "yahoo",
20    "facebookexternalhit",
21    "facebookcatalog",
22    "facebot",
23    "meta-externalagent",
24    "twitterbot",
25    "linkedinbot",
26    "slackbot",
27    "discordbot",
28    "whatsapp",
29    "telegrambot",
30    "pinterestbot",
31    "chatgpt-user",
32    "gptbot",
33    "claude-web",
34    "anthropic-ai",
35    "perplexitybot",
36    "cohere-ai",
37    "petalbot",
38    "bytespider",
39    "sogou",
40    "amazonbot",
41    "applebot",
42    "dotbot",
43    "semrushbot",
44    "ahrefsbot",
45    "majesticbot",
46    "mj12bot",
47    "rogerbot",
48    "exabot",
49    "sistrix",
50    "seolyt",
51    "barkrowler",
52    "blexbot",
53    "bubing",
54    "cliqzbot",
55    "uptimerobot",
56    "pingdom",
57    "statuscake",
58    "site24x7",
59    "lighthouse",
60    "pagespeed",
61    "speedcurve",
62    "headless",
63    "phantom",
64    "selenium",
65    "webdriver",
66    "puppeteer",
67    "archive.org_bot",
68    "ia_archiver",
69    "embedly",
70    "flipboard",
71    "google-structured-data-testing-tool",
72    "scrapy",
73    "python-requests",
74    "python-urllib",
75    "python-httpx",
76    "httpx/",
77    "aiohttp",
78    "curl",
79    "wget",
80    "libwww",
81    "http.rb",
82    "guzzlehttp",
83    "okhttp",
84    "apache-httpclient",
85    "go-http-client",
86    "node-fetch",
87    "axios",
88    "java/",
89    "perl/",
90    "ruby/",
91    "playstore-google",
92    "google-read-aloud",
93    "prefetch proxy",
94    "surf/",
95    "amazon-quick",
96];
97
98pub const BOT_IP_PREFIXES: &[&str] = &[
99    "66.249.", "40.77.", "157.55.", "207.46.", "69.171.", "173.252.", "31.13.",
100];
101
102pub fn matches_bot_pattern(user_agent: &str) -> bool {
103    let ua_lower = user_agent.to_lowercase();
104
105    if BOT_KEYWORDS
106        .iter()
107        .any(|keyword| ua_lower.contains(keyword))
108    {
109        return true;
110    }
111
112    if user_agent.len() < 10 {
113        return true;
114    }
115
116    if ua_lower.contains("compatible")
117        && !ua_lower.contains("chrome")
118        && !ua_lower.contains("firefox")
119        && !ua_lower.contains("safari")
120        && !ua_lower.contains("edge")
121    {
122        return true;
123    }
124
125    false
126}
127
128pub fn matches_bot_ip_range(ip: &str) -> bool {
129    BOT_IP_PREFIXES.iter().any(|prefix| ip.starts_with(prefix))
130}