Skip to main content

systemprompt_traits/
analytics.rs

1//! Session analytics and fingerprinting provider traits.
2//!
3//! These traits are dispatched as trait objects (`dyn _`), so they use
4//! `#[async_trait]`; native `async fn` in traits is not yet `dyn`-compatible.
5
6use async_trait::async_trait;
7use chrono::{DateTime, Utc};
8use http::{HeaderMap, Uri};
9use std::sync::Arc;
10use systemprompt_identifiers::{SessionId, SessionSource, UserId};
11
12pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
13
14#[derive(Debug, thiserror::Error)]
15#[non_exhaustive]
16pub enum AnalyticsProviderError {
17    #[error("Session not found")]
18    SessionNotFound,
19
20    #[error("Fingerprint not found")]
21    FingerprintNotFound,
22
23    #[error("Internal error: {0}")]
24    Internal(String),
25}
26
27#[derive(Debug, Clone, Default)]
28pub struct SessionAnalytics {
29    pub ip_address: Option<String>,
30    pub user_agent: Option<String>,
31    pub device_type: Option<String>,
32    pub browser: Option<String>,
33    pub os: Option<String>,
34    pub fingerprint_hash: Option<String>,
35    pub referer: Option<String>,
36    pub referrer_url: Option<String>,
37    pub referrer_source: Option<String>,
38    pub accept_language: Option<String>,
39    pub preferred_locale: Option<String>,
40    pub screen_width: Option<i32>,
41    pub screen_height: Option<i32>,
42    pub timezone: Option<String>,
43    pub page_url: Option<String>,
44    pub landing_page: Option<String>,
45    pub entry_url: Option<String>,
46    pub country: Option<String>,
47    pub region: Option<String>,
48    pub city: Option<String>,
49    pub utm_source: Option<String>,
50    pub utm_medium: Option<String>,
51    pub utm_campaign: Option<String>,
52    pub utm_content: Option<String>,
53    pub utm_term: Option<String>,
54}
55
56const AI_CRAWLER_TOKENS: &[&str] = &[
57    "notebooklm",
58    "gemini-deep-research",
59    "grammarly",
60    "chatgpt-user",
61    "oai-searchbot",
62    "gptbot",
63    "perplexitybot",
64    "perplexity-user",
65    "claudebot",
66    "claude-user",
67    "claude-web",
68    "anthropic-ai",
69    "applebot-extended",
70    "ccbot",
71    "bytespider",
72    "amazonbot",
73    "youbot",
74    "diffbot",
75    "cohere-ai",
76];
77
78impl SessionAnalytics {
79    pub fn is_ai_crawler(&self) -> bool {
80        self.user_agent.as_ref().is_some_and(|ua| {
81            let ua_lower = ua.to_lowercase();
82            AI_CRAWLER_TOKENS
83                .iter()
84                .any(|token| ua_lower.contains(token))
85        })
86    }
87
88    pub fn is_bot(&self) -> bool {
89        if self.is_ai_crawler() {
90            return false;
91        }
92        self.user_agent.as_ref().is_some_and(|ua| {
93            let ua_lower = ua.to_lowercase();
94            ua_lower.contains("bot")
95                || ua_lower.contains("crawler")
96                || ua_lower.contains("spider")
97                || ua_lower.contains("headless")
98        })
99    }
100
101    pub fn compute_fingerprint(&self) -> String {
102        use xxhash_rust::xxh64::xxh64;
103
104        if let Some(hash) = &self.fingerprint_hash {
105            return hash.clone();
106        }
107
108        let data = format!(
109            "{}|{}",
110            self.user_agent.as_deref().unwrap_or(""),
111            self.accept_language
112                .as_deref()
113                .or(self.preferred_locale.as_deref())
114                .unwrap_or("")
115        );
116
117        format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
118    }
119}
120
121#[derive(Debug, Clone)]
122pub struct AnalyticsSession {
123    pub session_id: SessionId,
124    pub user_id: Option<UserId>,
125    pub fingerprint: Option<String>,
126    pub created_at: DateTime<Utc>,
127}
128
129#[derive(Debug, Clone)]
130pub struct ActiveSession {
131    pub user_id: Option<UserId>,
132}
133
134#[derive(Debug)]
135pub struct CreateSessionInput<'a> {
136    pub session_id: &'a SessionId,
137    pub user_id: Option<&'a UserId>,
138    pub analytics: &'a SessionAnalytics,
139    pub session_source: SessionSource,
140    pub is_bot: bool,
141    pub is_ai_crawler: bool,
142    pub expires_at: DateTime<Utc>,
143}
144
145#[async_trait]
146pub trait AnalyticsProvider: Send + Sync {
147    fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
148
149    async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
150
151    async fn find_recent_session_by_fingerprint(
152        &self,
153        fingerprint: &str,
154        max_age_seconds: i64,
155    ) -> AnalyticsResult<Option<AnalyticsSession>>;
156
157    async fn find_session_by_id(
158        &self,
159        session_id: &SessionId,
160    ) -> AnalyticsResult<Option<AnalyticsSession>>;
161
162    async fn find_active_session_by_id(
163        &self,
164        session_id: &SessionId,
165    ) -> AnalyticsResult<Option<ActiveSession>>;
166
167    async fn revoke_session(&self, session_id: &SessionId) -> AnalyticsResult<()>;
168
169    async fn revoke_all_sessions_for_user(&self, user_id: &UserId) -> AnalyticsResult<u64>;
170
171    async fn migrate_user_sessions(
172        &self,
173        from_user_id: &UserId,
174        to_user_id: &UserId,
175    ) -> AnalyticsResult<u64>;
176
177    async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
178}
179
180#[async_trait]
181pub trait FingerprintProvider: Send + Sync {
182    async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
183
184    async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
185
186    async fn upsert_fingerprint(
187        &self,
188        fingerprint: &str,
189        ip_address: Option<&str>,
190        user_agent: Option<&str>,
191        screen_info: Option<&str>,
192    ) -> AnalyticsResult<()>;
193}
194
195pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
196
197pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;