systemprompt_analytics/services/
bot_keywords.rs1pub const BOT_KEYWORDS: &[&str] = &[
2 "bot",
3 "crawler",
4 "spider",
5 "scraper",
6 "crawling",
7 "googlebot",
8 "google-inspectiontool",
9 "adsbot-google",
10 "googleother",
11 "bingbot",
12 "bingpreview",
13 "msnbot",
14 "baiduspider",
15 "yandexbot",
16 "yandex.com/bots",
17 "duckduckbot",
18 "slurp",
19 "yahoo",
20 "facebookexternalhit",
21 "facebookcatalog",
22 "facebot",
23 "meta-externalagent",
24 "twitterbot",
25 "linkedinbot",
26 "slackbot",
27 "discordbot",
28 "whatsapp",
29 "telegrambot",
30 "pinterestbot",
31 "chatgpt-user",
32 "gptbot",
33 "claude-web",
34 "anthropic-ai",
35 "perplexitybot",
36 "cohere-ai",
37 "petalbot",
38 "bytespider",
39 "sogou",
40 "amazonbot",
41 "applebot",
42 "dotbot",
43 "semrushbot",
44 "ahrefsbot",
45 "majesticbot",
46 "mj12bot",
47 "rogerbot",
48 "exabot",
49 "sistrix",
50 "seolyt",
51 "barkrowler",
52 "blexbot",
53 "bubing",
54 "cliqzbot",
55 "uptimerobot",
56 "pingdom",
57 "statuscake",
58 "site24x7",
59 "lighthouse",
60 "pagespeed",
61 "speedcurve",
62 "headless",
63 "phantom",
64 "selenium",
65 "webdriver",
66 "puppeteer",
67 "archive.org_bot",
68 "ia_archiver",
69 "embedly",
70 "flipboard",
71 "google-structured-data-testing-tool",
72 "scrapy",
73 "python-requests",
74 "python-urllib",
75 "python-httpx",
76 "httpx/",
77 "aiohttp",
78 "curl",
79 "wget",
80 "libwww",
81 "http.rb",
82 "guzzlehttp",
83 "okhttp",
84 "apache-httpclient",
85 "go-http-client",
86 "node-fetch",
87 "axios",
88 "java/",
89 "perl/",
90 "ruby/",
91 "playstore-google",
92 "google-read-aloud",
93 "prefetch proxy",
94 "surf/",
95 "amazon-quick",
96 "gzip(gfe)",
97 "rv:1.9",
98];
99
100pub const BOT_IP_PREFIXES: &[&str] = &[
101 "66.249.", "40.77.", "157.55.", "207.46.", "69.171.", "173.252.", "31.13.",
102];
103
104pub fn matches_bot_pattern(user_agent: &str) -> bool {
105 let ua_lower = user_agent.to_lowercase();
106
107 if BOT_KEYWORDS
108 .iter()
109 .any(|keyword| ua_lower.contains(keyword))
110 {
111 return true;
112 }
113
114 if user_agent.len() < 10 {
115 return true;
116 }
117
118 if ua_lower.contains("compatible")
119 && !ua_lower.contains("chrome")
120 && !ua_lower.contains("firefox")
121 && !ua_lower.contains("safari")
122 && !ua_lower.contains("edge")
123 {
124 return true;
125 }
126
127 false
128}
129
130pub fn matches_bot_ip_range(ip: &str) -> bool {
131 BOT_IP_PREFIXES.iter().any(|prefix| ip.starts_with(prefix))
132}