systemprompt_analytics/repository/
traffic.rs1use crate::Result;
10use chrono::{DateTime, Utc};
11use sqlx::PgPool;
12use std::sync::Arc;
13use systemprompt_database::DbPool;
14
15use crate::models::cli::{BotTotalsRow, BotTypeRow, DeviceRow, GeoRow, TrafficSourceRow};
16
17#[derive(Debug)]
18pub struct TrafficAnalyticsRepository {
19 pool: Arc<PgPool>,
20}
21
22impl TrafficAnalyticsRepository {
23 pub fn new(db: &DbPool) -> Result<Self> {
24 let pool = db.pool_arc()?;
25 Ok(Self { pool })
26 }
27
28 pub async fn get_sources(
29 &self,
30 start: DateTime<Utc>,
31 end: DateTime<Utc>,
32 limit: i64,
33 engaged_only: bool,
34 ) -> Result<Vec<TrafficSourceRow>> {
35 if engaged_only {
36 sqlx::query_as!(
37 TrafficSourceRow,
38 r#"
39 SELECT
40 COALESCE(referrer_source, 'direct') as "source",
41 COUNT(*)::bigint as "count!"
42 FROM user_sessions
43 WHERE started_at >= $1 AND started_at < $2
44 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
45 AND landing_page IS NOT NULL AND request_count > 0
46 GROUP BY referrer_source
47 ORDER BY COUNT(*) DESC
48 LIMIT $3
49 "#,
50 start,
51 end,
52 limit
53 )
54 .fetch_all(&*self.pool)
55 .await
56 .map_err(Into::into)
57 } else {
58 sqlx::query_as!(
59 TrafficSourceRow,
60 r#"
61 SELECT
62 COALESCE(referrer_source, 'direct') as "source",
63 COUNT(*)::bigint as "count!"
64 FROM user_sessions
65 WHERE started_at >= $1 AND started_at < $2
66 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
67 GROUP BY referrer_source
68 ORDER BY COUNT(*) DESC
69 LIMIT $3
70 "#,
71 start,
72 end,
73 limit
74 )
75 .fetch_all(&*self.pool)
76 .await
77 .map_err(Into::into)
78 }
79 }
80
81 pub async fn get_geo_breakdown(
82 &self,
83 start: DateTime<Utc>,
84 end: DateTime<Utc>,
85 limit: i64,
86 engaged_only: bool,
87 ) -> Result<Vec<GeoRow>> {
88 if engaged_only {
89 sqlx::query_as!(
90 GeoRow,
91 r#"
92 SELECT
93 COALESCE(country, 'Unknown') as "country",
94 COUNT(*)::bigint as "count!"
95 FROM user_sessions
96 WHERE started_at >= $1 AND started_at < $2
97 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
98 AND landing_page IS NOT NULL AND request_count > 0
99 GROUP BY country
100 ORDER BY COUNT(*) DESC
101 LIMIT $3
102 "#,
103 start,
104 end,
105 limit
106 )
107 .fetch_all(&*self.pool)
108 .await
109 .map_err(Into::into)
110 } else {
111 sqlx::query_as!(
112 GeoRow,
113 r#"
114 SELECT
115 COALESCE(country, 'Unknown') as "country",
116 COUNT(*)::bigint as "count!"
117 FROM user_sessions
118 WHERE started_at >= $1 AND started_at < $2
119 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
120 GROUP BY country
121 ORDER BY COUNT(*) DESC
122 LIMIT $3
123 "#,
124 start,
125 end,
126 limit
127 )
128 .fetch_all(&*self.pool)
129 .await
130 .map_err(Into::into)
131 }
132 }
133
134 pub async fn get_device_breakdown(
135 &self,
136 start: DateTime<Utc>,
137 end: DateTime<Utc>,
138 limit: i64,
139 engaged_only: bool,
140 ) -> Result<Vec<DeviceRow>> {
141 if engaged_only {
142 sqlx::query_as!(
143 DeviceRow,
144 r#"
145 SELECT
146 COALESCE(device_type, 'unknown') as "device",
147 COALESCE(browser, 'unknown') as "browser",
148 COUNT(*)::bigint as "count!"
149 FROM user_sessions
150 WHERE started_at >= $1 AND started_at < $2
151 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
152 AND landing_page IS NOT NULL AND request_count > 0
153 GROUP BY device_type, browser
154 ORDER BY COUNT(*) DESC
155 LIMIT $3
156 "#,
157 start,
158 end,
159 limit
160 )
161 .fetch_all(&*self.pool)
162 .await
163 .map_err(Into::into)
164 } else {
165 sqlx::query_as!(
166 DeviceRow,
167 r#"
168 SELECT
169 COALESCE(device_type, 'unknown') as "device",
170 COALESCE(browser, 'unknown') as "browser",
171 COUNT(*)::bigint as "count!"
172 FROM user_sessions
173 WHERE started_at >= $1 AND started_at < $2
174 AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
175 GROUP BY device_type, browser
176 ORDER BY COUNT(*) DESC
177 LIMIT $3
178 "#,
179 start,
180 end,
181 limit
182 )
183 .fetch_all(&*self.pool)
184 .await
185 .map_err(Into::into)
186 }
187 }
188
189 pub async fn get_bot_totals(
190 &self,
191 start: DateTime<Utc>,
192 end: DateTime<Utc>,
193 engaged_only: bool,
194 ) -> Result<BotTotalsRow> {
195 if engaged_only {
196 sqlx::query_as!(
197 BotTotalsRow,
198 r#"
199 SELECT
200 COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false AND landing_page IS NOT NULL AND request_count > 0)::bigint as "human!",
201 COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true OR landing_page IS NULL OR request_count = 0)::bigint as "bot!"
202 FROM user_sessions
203 WHERE started_at >= $1 AND started_at < $2
204 "#,
205 start,
206 end
207 )
208 .fetch_one(&*self.pool)
209 .await
210 .map_err(Into::into)
211 } else {
212 sqlx::query_as!(
213 BotTotalsRow,
214 r#"
215 SELECT
216 COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false)::bigint as "human!",
217 COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true)::bigint as "bot!"
218 FROM user_sessions
219 WHERE started_at >= $1 AND started_at < $2
220 "#,
221 start,
222 end
223 )
224 .fetch_one(&*self.pool)
225 .await
226 .map_err(Into::into)
227 }
228 }
229
230 pub async fn get_bot_breakdown(
231 &self,
232 start: DateTime<Utc>,
233 end: DateTime<Utc>,
234 ) -> Result<Vec<BotTypeRow>> {
235 sqlx::query_as!(
236 BotTypeRow,
237 r#"
238 SELECT
239 CASE
240 WHEN user_agent ILIKE '%googlebot%' OR user_agent ILIKE '%google-inspectiontool%' OR user_agent ILIKE '%adsbot-google%' THEN 'Google'
241 WHEN user_agent ILIKE '%bingbot%' OR user_agent ILIKE '%bingpreview%' OR user_agent ILIKE '%msnbot%' THEN 'Bing'
242 WHEN user_agent ILIKE '%chatgpt%' OR user_agent ILIKE '%gptbot%' THEN 'OpenAI'
243 WHEN user_agent ILIKE '%claude%' OR user_agent ILIKE '%anthropic%' THEN 'Anthropic'
244 WHEN user_agent ILIKE '%perplexity%' THEN 'Perplexity'
245 WHEN user_agent ILIKE '%baiduspider%' THEN 'Baidu'
246 WHEN user_agent ILIKE '%yandexbot%' THEN 'Yandex'
247 WHEN user_agent ILIKE '%facebookexternalhit%' OR user_agent ILIKE '%facebot%' OR user_agent ILIKE '%meta-externalagent%' THEN 'Meta'
248 WHEN user_agent ILIKE '%twitterbot%' THEN 'Twitter/X'
249 WHEN user_agent ILIKE '%linkedinbot%' THEN 'LinkedIn'
250 WHEN user_agent ILIKE '%semrushbot%' OR user_agent ILIKE '%ahrefsbot%' OR user_agent ILIKE '%mj12bot%' OR user_agent ILIKE '%dotbot%' THEN 'SEO Crawlers'
251 WHEN user_agent ILIKE '%bytespider%' THEN 'ByteDance'
252 WHEN user_agent ILIKE '%amazonbot%' OR user_agent ILIKE '%applebot%' THEN 'Tech Giants'
253 WHEN user_agent ILIKE '%python%' OR user_agent ILIKE '%scrapy%' OR user_agent ILIKE '%httpx%' THEN 'Python Scrapers'
254 WHEN user_agent ILIKE '%curl%' OR user_agent ILIKE '%wget%' OR user_agent ILIKE '%node-fetch%' OR user_agent ILIKE '%axios%' THEN 'CLI/HTTP Tools'
255 WHEN user_agent ILIKE '%headless%' OR user_agent ILIKE '%phantom%' OR user_agent ILIKE '%selenium%' OR user_agent ILIKE '%puppeteer%' THEN 'Headless Browsers'
256 WHEN user_agent ILIKE '%uptimerobot%' OR user_agent ILIKE '%pingdom%' OR user_agent ILIKE '%statuscake%' OR user_agent ILIKE '%lighthouse%' THEN 'Monitoring'
257 WHEN is_behavioral_bot = true THEN 'Behavioral Bot'
258 WHEN is_scanner = true THEN 'Scanner'
259 ELSE 'Other'
260 END as "bot_type",
261 COUNT(*)::bigint as "count!"
262 FROM user_sessions
263 WHERE started_at >= $1 AND started_at < $2
264 AND (is_bot = true OR is_behavioral_bot = true OR is_scanner = true)
265 GROUP BY 1
266 ORDER BY COUNT(*) DESC
267 "#,
268 start,
269 end
270 )
271 .fetch_all(&*self.pool)
272 .await
273 .map_err(Into::into)
274 }
275}