Skip to main content

systemprompt_analytics/repository/
traffic.rs

1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use sqlx::PgPool;
4use std::sync::Arc;
5use systemprompt_database::DbPool;
6
7use crate::models::cli::{BotTotalsRow, BotTypeRow, DeviceRow, GeoRow, TrafficSourceRow};
8
9#[derive(Debug)]
10pub struct TrafficAnalyticsRepository {
11    pool: Arc<PgPool>,
12}
13
14impl TrafficAnalyticsRepository {
15    pub fn new(db: &DbPool) -> Result<Self> {
16        let pool = db.pool_arc()?;
17        Ok(Self { pool })
18    }
19
20    pub async fn get_sources(
21        &self,
22        start: DateTime<Utc>,
23        end: DateTime<Utc>,
24        limit: i64,
25        engaged_only: bool,
26    ) -> Result<Vec<TrafficSourceRow>> {
27        if engaged_only {
28            sqlx::query_as!(
29                TrafficSourceRow,
30                r#"
31                SELECT
32                    COALESCE(referrer_source, 'direct') as "source",
33                    COUNT(*)::bigint as "count!"
34                FROM user_sessions
35                WHERE started_at >= $1 AND started_at < $2
36                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
37                  AND landing_page IS NOT NULL AND request_count > 0
38                GROUP BY referrer_source
39                ORDER BY COUNT(*) DESC
40                LIMIT $3
41                "#,
42                start,
43                end,
44                limit
45            )
46            .fetch_all(&*self.pool)
47            .await
48            .map_err(Into::into)
49        } else {
50            sqlx::query_as!(
51                TrafficSourceRow,
52                r#"
53                SELECT
54                    COALESCE(referrer_source, 'direct') as "source",
55                    COUNT(*)::bigint as "count!"
56                FROM user_sessions
57                WHERE started_at >= $1 AND started_at < $2
58                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
59                GROUP BY referrer_source
60                ORDER BY COUNT(*) DESC
61                LIMIT $3
62                "#,
63                start,
64                end,
65                limit
66            )
67            .fetch_all(&*self.pool)
68            .await
69            .map_err(Into::into)
70        }
71    }
72
73    pub async fn get_geo_breakdown(
74        &self,
75        start: DateTime<Utc>,
76        end: DateTime<Utc>,
77        limit: i64,
78        engaged_only: bool,
79    ) -> Result<Vec<GeoRow>> {
80        if engaged_only {
81            sqlx::query_as!(
82                GeoRow,
83                r#"
84                SELECT
85                    COALESCE(country, 'Unknown') as "country",
86                    COUNT(*)::bigint as "count!"
87                FROM user_sessions
88                WHERE started_at >= $1 AND started_at < $2
89                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
90                  AND landing_page IS NOT NULL AND request_count > 0
91                GROUP BY country
92                ORDER BY COUNT(*) DESC
93                LIMIT $3
94                "#,
95                start,
96                end,
97                limit
98            )
99            .fetch_all(&*self.pool)
100            .await
101            .map_err(Into::into)
102        } else {
103            sqlx::query_as!(
104                GeoRow,
105                r#"
106                SELECT
107                    COALESCE(country, 'Unknown') as "country",
108                    COUNT(*)::bigint as "count!"
109                FROM user_sessions
110                WHERE started_at >= $1 AND started_at < $2
111                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
112                GROUP BY country
113                ORDER BY COUNT(*) DESC
114                LIMIT $3
115                "#,
116                start,
117                end,
118                limit
119            )
120            .fetch_all(&*self.pool)
121            .await
122            .map_err(Into::into)
123        }
124    }
125
126    pub async fn get_device_breakdown(
127        &self,
128        start: DateTime<Utc>,
129        end: DateTime<Utc>,
130        limit: i64,
131        engaged_only: bool,
132    ) -> Result<Vec<DeviceRow>> {
133        if engaged_only {
134            sqlx::query_as!(
135                DeviceRow,
136                r#"
137                SELECT
138                    COALESCE(device_type, 'unknown') as "device",
139                    COALESCE(browser, 'unknown') as "browser",
140                    COUNT(*)::bigint as "count!"
141                FROM user_sessions
142                WHERE started_at >= $1 AND started_at < $2
143                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
144                  AND landing_page IS NOT NULL AND request_count > 0
145                GROUP BY device_type, browser
146                ORDER BY COUNT(*) DESC
147                LIMIT $3
148                "#,
149                start,
150                end,
151                limit
152            )
153            .fetch_all(&*self.pool)
154            .await
155            .map_err(Into::into)
156        } else {
157            sqlx::query_as!(
158                DeviceRow,
159                r#"
160                SELECT
161                    COALESCE(device_type, 'unknown') as "device",
162                    COALESCE(browser, 'unknown') as "browser",
163                    COUNT(*)::bigint as "count!"
164                FROM user_sessions
165                WHERE started_at >= $1 AND started_at < $2
166                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
167                GROUP BY device_type, browser
168                ORDER BY COUNT(*) DESC
169                LIMIT $3
170                "#,
171                start,
172                end,
173                limit
174            )
175            .fetch_all(&*self.pool)
176            .await
177            .map_err(Into::into)
178        }
179    }
180
181    pub async fn get_bot_totals(
182        &self,
183        start: DateTime<Utc>,
184        end: DateTime<Utc>,
185        engaged_only: bool,
186    ) -> Result<BotTotalsRow> {
187        if engaged_only {
188            sqlx::query_as!(
189                BotTotalsRow,
190                r#"
191                SELECT
192                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false AND landing_page IS NOT NULL AND request_count > 0)::bigint as "human!",
193                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true OR landing_page IS NULL OR request_count = 0)::bigint as "bot!"
194                FROM user_sessions
195                WHERE started_at >= $1 AND started_at < $2
196                "#,
197                start,
198                end
199            )
200            .fetch_one(&*self.pool)
201            .await
202            .map_err(Into::into)
203        } else {
204            sqlx::query_as!(
205                BotTotalsRow,
206                r#"
207                SELECT
208                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false)::bigint as "human!",
209                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true)::bigint as "bot!"
210                FROM user_sessions
211                WHERE started_at >= $1 AND started_at < $2
212                "#,
213                start,
214                end
215            )
216            .fetch_one(&*self.pool)
217            .await
218            .map_err(Into::into)
219        }
220    }
221
222    pub async fn get_bot_breakdown(
223        &self,
224        start: DateTime<Utc>,
225        end: DateTime<Utc>,
226    ) -> Result<Vec<BotTypeRow>> {
227        sqlx::query_as!(
228            BotTypeRow,
229            r#"
230            SELECT
231                CASE
232                    WHEN user_agent ILIKE '%googlebot%' OR user_agent ILIKE '%google-inspectiontool%' OR user_agent ILIKE '%adsbot-google%' THEN 'Google'
233                    WHEN user_agent ILIKE '%bingbot%' OR user_agent ILIKE '%bingpreview%' OR user_agent ILIKE '%msnbot%' THEN 'Bing'
234                    WHEN user_agent ILIKE '%chatgpt%' OR user_agent ILIKE '%gptbot%' THEN 'OpenAI'
235                    WHEN user_agent ILIKE '%claude%' OR user_agent ILIKE '%anthropic%' THEN 'Anthropic'
236                    WHEN user_agent ILIKE '%perplexity%' THEN 'Perplexity'
237                    WHEN user_agent ILIKE '%baiduspider%' THEN 'Baidu'
238                    WHEN user_agent ILIKE '%yandexbot%' THEN 'Yandex'
239                    WHEN user_agent ILIKE '%facebookexternalhit%' OR user_agent ILIKE '%facebot%' OR user_agent ILIKE '%meta-externalagent%' THEN 'Meta'
240                    WHEN user_agent ILIKE '%twitterbot%' THEN 'Twitter/X'
241                    WHEN user_agent ILIKE '%linkedinbot%' THEN 'LinkedIn'
242                    WHEN user_agent ILIKE '%semrushbot%' OR user_agent ILIKE '%ahrefsbot%' OR user_agent ILIKE '%mj12bot%' OR user_agent ILIKE '%dotbot%' THEN 'SEO Crawlers'
243                    WHEN user_agent ILIKE '%bytespider%' THEN 'ByteDance'
244                    WHEN user_agent ILIKE '%amazonbot%' OR user_agent ILIKE '%applebot%' THEN 'Tech Giants'
245                    WHEN user_agent ILIKE '%python%' OR user_agent ILIKE '%scrapy%' OR user_agent ILIKE '%httpx%' THEN 'Python Scrapers'
246                    WHEN user_agent ILIKE '%curl%' OR user_agent ILIKE '%wget%' OR user_agent ILIKE '%node-fetch%' OR user_agent ILIKE '%axios%' THEN 'CLI/HTTP Tools'
247                    WHEN user_agent ILIKE '%headless%' OR user_agent ILIKE '%phantom%' OR user_agent ILIKE '%selenium%' OR user_agent ILIKE '%puppeteer%' THEN 'Headless Browsers'
248                    WHEN user_agent ILIKE '%uptimerobot%' OR user_agent ILIKE '%pingdom%' OR user_agent ILIKE '%statuscake%' OR user_agent ILIKE '%lighthouse%' THEN 'Monitoring'
249                    WHEN is_behavioral_bot = true THEN 'Behavioral Bot'
250                    WHEN is_scanner = true THEN 'Scanner'
251                    ELSE 'Other'
252                END as "bot_type",
253                COUNT(*)::bigint as "count!"
254            FROM user_sessions
255            WHERE started_at >= $1 AND started_at < $2
256              AND (is_bot = true OR is_behavioral_bot = true OR is_scanner = true)
257            GROUP BY 1
258            ORDER BY COUNT(*) DESC
259            "#,
260            start,
261            end
262        )
263        .fetch_all(&*self.pool)
264        .await
265        .map_err(Into::into)
266    }
267}