systemprompt_analytics/services/
bot_keywords.rs1pub const BOT_KEYWORDS: &[&str] = &[
2 "bot",
3 "crawler",
4 "spider",
5 "scraper",
6 "crawling",
7 "googlebot",
8 "google-inspectiontool",
9 "adsbot-google",
10 "googleother",
11 "bingbot",
12 "bingpreview",
13 "msnbot",
14 "baiduspider",
15 "yandexbot",
16 "yandex.com/bots",
17 "duckduckbot",
18 "slurp",
19 "yahoo",
20 "facebookexternalhit",
21 "facebookcatalog",
22 "facebot",
23 "meta-externalagent",
24 "twitterbot",
25 "linkedinbot",
26 "slackbot",
27 "discordbot",
28 "whatsapp",
29 "telegrambot",
30 "pinterestbot",
31 "petalbot",
32 "sogou",
33 "applebot",
34 "dotbot",
35 "semrushbot",
36 "ahrefsbot",
37 "majesticbot",
38 "mj12bot",
39 "rogerbot",
40 "exabot",
41 "sistrix",
42 "seolyt",
43 "barkrowler",
44 "blexbot",
45 "bubing",
46 "cliqzbot",
47 "uptimerobot",
48 "pingdom",
49 "statuscake",
50 "site24x7",
51 "lighthouse",
52 "pagespeed",
53 "speedcurve",
54 "headless",
55 "phantom",
56 "selenium",
57 "webdriver",
58 "puppeteer",
59 "archive.org_bot",
60 "ia_archiver",
61 "embedly",
62 "flipboard",
63 "google-structured-data-testing-tool",
64 "scrapy",
65 "python-requests",
66 "python-urllib",
67 "python-httpx",
68 "httpx/",
69 "aiohttp",
70 "curl",
71 "wget",
72 "libwww",
73 "http.rb",
74 "guzzlehttp",
75 "okhttp",
76 "apache-httpclient",
77 "go-http-client",
78 "node-fetch",
79 "axios",
80 "java/",
81 "perl/",
82 "ruby/",
83 "playstore-google",
84 "google-read-aloud",
85 "prefetch proxy",
86 "surf/",
87 "amazon-quick",
88 "gzip(gfe)",
89 "rv:1.9",
90];
91
92pub const BOT_IP_PREFIXES: &[&str] = &[
93 "66.249.", "40.77.", "157.55.", "207.46.", "69.171.", "173.252.", "31.13.",
94];
95
96pub fn is_malformed_user_agent(user_agent: &str) -> bool {
97 if user_agent.is_empty() || user_agent.len() < 10 {
98 return true;
99 }
100
101 let trimmed = user_agent.trim();
102 if trimmed.starts_with('{') || trimmed.ends_with('}') {
103 return true;
104 }
105
106 let lower = trimmed.to_lowercase();
107 matches!(lower.as_str(), "-" | "null" | "unknown")
108}
109
110pub fn matches_bot_pattern(user_agent: &str) -> bool {
111 if is_malformed_user_agent(user_agent) {
112 return true;
113 }
114
115 let ua_lower = user_agent.to_lowercase();
116
117 if BOT_KEYWORDS
118 .iter()
119 .any(|keyword| ua_lower.contains(keyword))
120 {
121 return true;
122 }
123
124 if ua_lower.contains("compatible")
125 && !ua_lower.contains("chrome")
126 && !ua_lower.contains("firefox")
127 && !ua_lower.contains("safari")
128 && !ua_lower.contains("edge")
129 {
130 return true;
131 }
132
133 false
134}
135
136pub fn matches_bot_ip_range(ip: &str) -> bool {
137 BOT_IP_PREFIXES.iter().any(|prefix| ip.starts_with(prefix))
138}