Skip to main content

systemprompt_analytics/repository/
traffic.rs

1//! Traffic-source, geography, device, and bot analytics.
2//!
3//! [`TrafficAnalyticsRepository`] reads `user_sessions` to break sessions
4//! down by referrer source, country, and device, and to classify human
5//! versus bot traffic (including a user-agent-driven bot taxonomy). An
6//! `engaged_only` flag restricts the human-facing breakdowns to sessions with
7//! a landing page and at least one request.
8
9use crate::Result;
10use chrono::{DateTime, Utc};
11use sqlx::PgPool;
12use std::sync::Arc;
13use systemprompt_database::DbPool;
14
15use crate::models::cli::{BotTotalsRow, BotTypeRow, DeviceRow, GeoRow, TrafficSourceRow};
16
17#[derive(Debug)]
18pub struct TrafficAnalyticsRepository {
19    pool: Arc<PgPool>,
20}
21
22impl TrafficAnalyticsRepository {
23    pub fn new(db: &DbPool) -> Result<Self> {
24        let pool = db.pool_arc()?;
25        Ok(Self { pool })
26    }
27
28    pub async fn get_sources(
29        &self,
30        start: DateTime<Utc>,
31        end: DateTime<Utc>,
32        limit: i64,
33        engaged_only: bool,
34    ) -> Result<Vec<TrafficSourceRow>> {
35        if engaged_only {
36            sqlx::query_as!(
37                TrafficSourceRow,
38                r#"
39                SELECT
40                    COALESCE(referrer_source, 'direct') as "source",
41                    COUNT(*)::bigint as "count!"
42                FROM user_sessions
43                WHERE started_at >= $1 AND started_at < $2
44                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
45                  AND landing_page IS NOT NULL AND request_count > 0
46                GROUP BY referrer_source
47                ORDER BY COUNT(*) DESC
48                LIMIT $3
49                "#,
50                start,
51                end,
52                limit
53            )
54            .fetch_all(&*self.pool)
55            .await
56            .map_err(Into::into)
57        } else {
58            sqlx::query_as!(
59                TrafficSourceRow,
60                r#"
61                SELECT
62                    COALESCE(referrer_source, 'direct') as "source",
63                    COUNT(*)::bigint as "count!"
64                FROM user_sessions
65                WHERE started_at >= $1 AND started_at < $2
66                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
67                GROUP BY referrer_source
68                ORDER BY COUNT(*) DESC
69                LIMIT $3
70                "#,
71                start,
72                end,
73                limit
74            )
75            .fetch_all(&*self.pool)
76            .await
77            .map_err(Into::into)
78        }
79    }
80
81    pub async fn get_geo_breakdown(
82        &self,
83        start: DateTime<Utc>,
84        end: DateTime<Utc>,
85        limit: i64,
86        engaged_only: bool,
87    ) -> Result<Vec<GeoRow>> {
88        if engaged_only {
89            sqlx::query_as!(
90                GeoRow,
91                r#"
92                SELECT
93                    COALESCE(country, 'Unknown') as "country",
94                    COUNT(*)::bigint as "count!"
95                FROM user_sessions
96                WHERE started_at >= $1 AND started_at < $2
97                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
98                  AND landing_page IS NOT NULL AND request_count > 0
99                GROUP BY country
100                ORDER BY COUNT(*) DESC
101                LIMIT $3
102                "#,
103                start,
104                end,
105                limit
106            )
107            .fetch_all(&*self.pool)
108            .await
109            .map_err(Into::into)
110        } else {
111            sqlx::query_as!(
112                GeoRow,
113                r#"
114                SELECT
115                    COALESCE(country, 'Unknown') as "country",
116                    COUNT(*)::bigint as "count!"
117                FROM user_sessions
118                WHERE started_at >= $1 AND started_at < $2
119                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
120                GROUP BY country
121                ORDER BY COUNT(*) DESC
122                LIMIT $3
123                "#,
124                start,
125                end,
126                limit
127            )
128            .fetch_all(&*self.pool)
129            .await
130            .map_err(Into::into)
131        }
132    }
133
134    pub async fn get_device_breakdown(
135        &self,
136        start: DateTime<Utc>,
137        end: DateTime<Utc>,
138        limit: i64,
139        engaged_only: bool,
140    ) -> Result<Vec<DeviceRow>> {
141        if engaged_only {
142            sqlx::query_as!(
143                DeviceRow,
144                r#"
145                SELECT
146                    COALESCE(device_type, 'unknown') as "device",
147                    COALESCE(browser, 'unknown') as "browser",
148                    COUNT(*)::bigint as "count!"
149                FROM user_sessions
150                WHERE started_at >= $1 AND started_at < $2
151                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
152                  AND landing_page IS NOT NULL AND request_count > 0
153                GROUP BY device_type, browser
154                ORDER BY COUNT(*) DESC
155                LIMIT $3
156                "#,
157                start,
158                end,
159                limit
160            )
161            .fetch_all(&*self.pool)
162            .await
163            .map_err(Into::into)
164        } else {
165            sqlx::query_as!(
166                DeviceRow,
167                r#"
168                SELECT
169                    COALESCE(device_type, 'unknown') as "device",
170                    COALESCE(browser, 'unknown') as "browser",
171                    COUNT(*)::bigint as "count!"
172                FROM user_sessions
173                WHERE started_at >= $1 AND started_at < $2
174                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
175                GROUP BY device_type, browser
176                ORDER BY COUNT(*) DESC
177                LIMIT $3
178                "#,
179                start,
180                end,
181                limit
182            )
183            .fetch_all(&*self.pool)
184            .await
185            .map_err(Into::into)
186        }
187    }
188
189    pub async fn get_bot_totals(
190        &self,
191        start: DateTime<Utc>,
192        end: DateTime<Utc>,
193        engaged_only: bool,
194    ) -> Result<BotTotalsRow> {
195        if engaged_only {
196            sqlx::query_as!(
197                BotTotalsRow,
198                r#"
199                SELECT
200                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false AND landing_page IS NOT NULL AND request_count > 0)::bigint as "human!",
201                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true OR landing_page IS NULL OR request_count = 0)::bigint as "bot!"
202                FROM user_sessions
203                WHERE started_at >= $1 AND started_at < $2
204                "#,
205                start,
206                end
207            )
208            .fetch_one(&*self.pool)
209            .await
210            .map_err(Into::into)
211        } else {
212            sqlx::query_as!(
213                BotTotalsRow,
214                r#"
215                SELECT
216                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false)::bigint as "human!",
217                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true)::bigint as "bot!"
218                FROM user_sessions
219                WHERE started_at >= $1 AND started_at < $2
220                "#,
221                start,
222                end
223            )
224            .fetch_one(&*self.pool)
225            .await
226            .map_err(Into::into)
227        }
228    }
229
230    pub async fn get_bot_breakdown(
231        &self,
232        start: DateTime<Utc>,
233        end: DateTime<Utc>,
234    ) -> Result<Vec<BotTypeRow>> {
235        sqlx::query_as!(
236            BotTypeRow,
237            r#"
238            SELECT
239                CASE
240                    WHEN user_agent ILIKE '%googlebot%' OR user_agent ILIKE '%google-inspectiontool%' OR user_agent ILIKE '%adsbot-google%' THEN 'Google'
241                    WHEN user_agent ILIKE '%bingbot%' OR user_agent ILIKE '%bingpreview%' OR user_agent ILIKE '%msnbot%' THEN 'Bing'
242                    WHEN user_agent ILIKE '%chatgpt%' OR user_agent ILIKE '%gptbot%' THEN 'OpenAI'
243                    WHEN user_agent ILIKE '%claude%' OR user_agent ILIKE '%anthropic%' THEN 'Anthropic'
244                    WHEN user_agent ILIKE '%perplexity%' THEN 'Perplexity'
245                    WHEN user_agent ILIKE '%baiduspider%' THEN 'Baidu'
246                    WHEN user_agent ILIKE '%yandexbot%' THEN 'Yandex'
247                    WHEN user_agent ILIKE '%facebookexternalhit%' OR user_agent ILIKE '%facebot%' OR user_agent ILIKE '%meta-externalagent%' THEN 'Meta'
248                    WHEN user_agent ILIKE '%twitterbot%' THEN 'Twitter/X'
249                    WHEN user_agent ILIKE '%linkedinbot%' THEN 'LinkedIn'
250                    WHEN user_agent ILIKE '%semrushbot%' OR user_agent ILIKE '%ahrefsbot%' OR user_agent ILIKE '%mj12bot%' OR user_agent ILIKE '%dotbot%' THEN 'SEO Crawlers'
251                    WHEN user_agent ILIKE '%bytespider%' THEN 'ByteDance'
252                    WHEN user_agent ILIKE '%amazonbot%' OR user_agent ILIKE '%applebot%' THEN 'Tech Giants'
253                    WHEN user_agent ILIKE '%python%' OR user_agent ILIKE '%scrapy%' OR user_agent ILIKE '%httpx%' THEN 'Python Scrapers'
254                    WHEN user_agent ILIKE '%curl%' OR user_agent ILIKE '%wget%' OR user_agent ILIKE '%node-fetch%' OR user_agent ILIKE '%axios%' THEN 'CLI/HTTP Tools'
255                    WHEN user_agent ILIKE '%headless%' OR user_agent ILIKE '%phantom%' OR user_agent ILIKE '%selenium%' OR user_agent ILIKE '%puppeteer%' THEN 'Headless Browsers'
256                    WHEN user_agent ILIKE '%uptimerobot%' OR user_agent ILIKE '%pingdom%' OR user_agent ILIKE '%statuscake%' OR user_agent ILIKE '%lighthouse%' THEN 'Monitoring'
257                    WHEN is_behavioral_bot = true THEN 'Behavioral Bot'
258                    WHEN is_scanner = true THEN 'Scanner'
259                    ELSE 'Other'
260                END as "bot_type",
261                COUNT(*)::bigint as "count!"
262            FROM user_sessions
263            WHERE started_at >= $1 AND started_at < $2
264              AND (is_bot = true OR is_behavioral_bot = true OR is_scanner = true)
265            GROUP BY 1
266            ORDER BY COUNT(*) DESC
267            "#,
268            start,
269            end
270        )
271        .fetch_all(&*self.pool)
272        .await
273        .map_err(Into::into)
274    }
275}