Skip to main content

systemprompt_analytics/repository/
traffic.rs

1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use sqlx::PgPool;
4use std::sync::Arc;
5use systemprompt_database::DbPool;
6
7use crate::models::cli::{BotTotalsRow, BotTypeRow, DeviceRow, GeoRow, TrafficSourceRow};
8
9#[derive(Debug)]
10pub struct TrafficAnalyticsRepository {
11    pool: Arc<PgPool>,
12}
13
14impl TrafficAnalyticsRepository {
15    pub fn new(db: &DbPool) -> Result<Self> {
16        let pool = db.pool_arc()?;
17        Ok(Self { pool })
18    }
19
20    pub async fn get_sources(
21        &self,
22        start: DateTime<Utc>,
23        end: DateTime<Utc>,
24        limit: i64,
25        engaged_only: bool,
26    ) -> Result<Vec<TrafficSourceRow>> {
27        if engaged_only {
28            sqlx::query_as!(
29                TrafficSourceRow,
30                r#"
31                SELECT
32                    COALESCE(referrer_source, 'direct') as "source",
33                    COUNT(*)::bigint as "count!"
34                FROM user_sessions
35                WHERE started_at >= $1 AND started_at < $2
36                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
37                  AND landing_page IS NOT NULL AND request_count > 0
38                GROUP BY referrer_source
39                ORDER BY COUNT(*) DESC
40                LIMIT $3
41                "#,
42                start,
43                end,
44                limit
45            )
46            .fetch_all(&*self.pool)
47            .await
48            .map_err(Into::into)
49        } else {
50            sqlx::query_as!(
51                TrafficSourceRow,
52                r#"
53                SELECT
54                    COALESCE(referrer_source, 'direct') as "source",
55                    COUNT(*)::bigint as "count!"
56                FROM user_sessions
57                WHERE started_at >= $1 AND started_at < $2
58                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
59                GROUP BY referrer_source
60                ORDER BY COUNT(*) DESC
61                LIMIT $3
62                "#,
63                start,
64                end,
65                limit
66            )
67            .fetch_all(&*self.pool)
68            .await
69            .map_err(Into::into)
70        }
71    }
72
73    pub async fn get_geo_breakdown(
74        &self,
75        start: DateTime<Utc>,
76        end: DateTime<Utc>,
77        limit: i64,
78        engaged_only: bool,
79    ) -> Result<Vec<GeoRow>> {
80        if engaged_only {
81            sqlx::query_as!(
82                GeoRow,
83                r#"
84                SELECT
85                    COALESCE(country, 'Unknown') as "country",
86                    COUNT(*)::bigint as "count!"
87                FROM user_sessions
88                WHERE started_at >= $1 AND started_at < $2
89                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
90                  AND landing_page IS NOT NULL AND request_count > 0
91                GROUP BY country
92                ORDER BY COUNT(*) DESC
93                LIMIT $3
94                "#,
95                start,
96                end,
97                limit
98            )
99            .fetch_all(&*self.pool)
100            .await
101            .map_err(Into::into)
102        } else {
103            sqlx::query_as!(
104                GeoRow,
105                r#"
106                SELECT
107                    COALESCE(country, 'Unknown') as "country",
108                    COUNT(*)::bigint as "count!"
109                FROM user_sessions
110                WHERE started_at >= $1 AND started_at < $2
111                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
112                GROUP BY country
113                ORDER BY COUNT(*) DESC
114                LIMIT $3
115                "#,
116                start,
117                end,
118                limit
119            )
120            .fetch_all(&*self.pool)
121            .await
122            .map_err(Into::into)
123        }
124    }
125
126    pub async fn get_device_breakdown(
127        &self,
128        start: DateTime<Utc>,
129        end: DateTime<Utc>,
130        limit: i64,
131        engaged_only: bool,
132    ) -> Result<Vec<DeviceRow>> {
133        if engaged_only {
134            sqlx::query_as!(
135                DeviceRow,
136                r#"
137                SELECT
138                    COALESCE(device_type, 'unknown') as "device",
139                    COALESCE(browser, 'unknown') as "browser",
140                    COUNT(*)::bigint as "count!"
141                FROM user_sessions
142                WHERE started_at >= $1 AND started_at < $2
143                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
144                  AND landing_page IS NOT NULL AND request_count > 0
145                GROUP BY device_type, browser
146                ORDER BY COUNT(*) DESC
147                LIMIT $3
148                "#,
149                start,
150                end,
151                limit
152            )
153            .fetch_all(&*self.pool)
154            .await
155            .map_err(Into::into)
156        } else {
157            sqlx::query_as!(
158                DeviceRow,
159                r#"
160                SELECT
161                    COALESCE(device_type, 'unknown') as "device",
162                    COALESCE(browser, 'unknown') as "browser",
163                    COUNT(*)::bigint as "count!"
164                FROM user_sessions
165                WHERE started_at >= $1 AND started_at < $2
166                  AND is_bot = false AND is_behavioral_bot = false AND is_scanner = false
167                GROUP BY device_type, browser
168                ORDER BY COUNT(*) DESC
169                LIMIT $3
170                "#,
171                start,
172                end,
173                limit
174            )
175            .fetch_all(&*self.pool)
176            .await
177            .map_err(Into::into)
178        }
179    }
180
181    pub async fn get_bot_totals(
182        &self,
183        start: DateTime<Utc>,
184        end: DateTime<Utc>,
185        engaged_only: bool,
186    ) -> Result<BotTotalsRow> {
187        if engaged_only {
188            sqlx::query_as!(
189                BotTotalsRow,
190                r#"
191                SELECT
192                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false AND landing_page IS NOT NULL AND request_count > 0)::bigint as "human!",
193                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true OR landing_page IS NULL OR request_count = 0)::bigint as "bot!"
194                FROM user_sessions
195                WHERE started_at >= $1 AND started_at < $2
196                "#,
197                start,
198                end
199            )
200            .fetch_one(&*self.pool)
201            .await
202            .map_err(Into::into)
203        } else {
204            sqlx::query_as!(
205                BotTotalsRow,
206                r#"
207                SELECT
208                    COUNT(*) FILTER (WHERE is_bot = false AND is_behavioral_bot = false AND is_scanner = false)::bigint as "human!",
209                    COUNT(*) FILTER (WHERE is_bot = true OR is_behavioral_bot = true OR is_scanner = true)::bigint as "bot!"
210                FROM user_sessions
211                WHERE started_at >= $1 AND started_at < $2
212                "#,
213                start,
214                end
215            )
216            .fetch_one(&*self.pool)
217            .await
218            .map_err(Into::into)
219        }
220    }
221
222    pub async fn get_bot_breakdown(
223        &self,
224        start: DateTime<Utc>,
225        end: DateTime<Utc>,
226    ) -> Result<Vec<BotTypeRow>> {
227        sqlx::query_as!(
228            BotTypeRow,
229            r#"
230            SELECT
231                COALESCE(
232                    CASE
233                        WHEN user_agent ILIKE '%googlebot%' THEN 'Googlebot'
234                        WHEN user_agent ILIKE '%bingbot%' THEN 'Bingbot'
235                        WHEN user_agent ILIKE '%chatgpt%' THEN 'ChatGPT'
236                        WHEN user_agent ILIKE '%claude%' THEN 'Claude'
237                        WHEN user_agent ILIKE '%perplexity%' THEN 'Perplexity'
238                        ELSE 'Other'
239                    END,
240                    'Unknown'
241                ) as "bot_type",
242                COUNT(*)::bigint as "count!"
243            FROM user_sessions
244            WHERE started_at >= $1 AND started_at < $2
245              AND (is_bot = true OR is_behavioral_bot = true OR is_scanner = true)
246            GROUP BY 1
247            ORDER BY COUNT(*) DESC
248            "#,
249            start,
250            end
251        )
252        .fetch_all(&*self.pool)
253        .await
254        .map_err(Into::into)
255    }
256}