1use async_trait::async_trait;
7use chrono::{DateTime, Utc};
8use http::{HeaderMap, Uri};
9use std::sync::Arc;
10use systemprompt_identifiers::{SessionId, SessionSource, UserId};
11
12pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
13
14#[derive(Debug, thiserror::Error)]
15#[non_exhaustive]
16pub enum AnalyticsProviderError {
17 #[error("Session not found")]
18 SessionNotFound,
19
20 #[error("Fingerprint not found")]
21 FingerprintNotFound,
22
23 #[error("Internal error: {0}")]
24 Internal(String),
25}
26
27#[derive(Debug, Clone, Default)]
28pub struct SessionAnalytics {
29 pub ip_address: Option<String>,
30 pub user_agent: Option<String>,
31 pub device_type: Option<String>,
32 pub browser: Option<String>,
33 pub os: Option<String>,
34 pub fingerprint_hash: Option<String>,
35 pub referer: Option<String>,
36 pub referrer_url: Option<String>,
37 pub referrer_source: Option<String>,
38 pub accept_language: Option<String>,
39 pub preferred_locale: Option<String>,
40 pub screen_width: Option<i32>,
41 pub screen_height: Option<i32>,
42 pub timezone: Option<String>,
43 pub page_url: Option<String>,
44 pub landing_page: Option<String>,
45 pub entry_url: Option<String>,
46 pub country: Option<String>,
47 pub region: Option<String>,
48 pub city: Option<String>,
49 pub utm_source: Option<String>,
50 pub utm_medium: Option<String>,
51 pub utm_campaign: Option<String>,
52 pub utm_content: Option<String>,
53 pub utm_term: Option<String>,
54}
55
56const AI_CRAWLER_TOKENS: &[&str] = &[
57 "notebooklm",
58 "gemini-deep-research",
59 "grammarly",
60 "chatgpt-user",
61 "oai-searchbot",
62 "gptbot",
63 "perplexitybot",
64 "perplexity-user",
65 "claudebot",
66 "claude-user",
67 "claude-web",
68 "anthropic-ai",
69 "applebot-extended",
70 "ccbot",
71 "bytespider",
72 "amazonbot",
73 "youbot",
74 "diffbot",
75 "cohere-ai",
76];
77
78impl SessionAnalytics {
79 pub fn is_ai_crawler(&self) -> bool {
80 self.user_agent.as_ref().is_some_and(|ua| {
81 let ua_lower = ua.to_lowercase();
82 AI_CRAWLER_TOKENS
83 .iter()
84 .any(|token| ua_lower.contains(token))
85 })
86 }
87
88 pub fn is_bot(&self) -> bool {
89 if self.is_ai_crawler() {
90 return false;
91 }
92 self.user_agent.as_ref().is_some_and(|ua| {
93 let ua_lower = ua.to_lowercase();
94 ua_lower.contains("bot")
95 || ua_lower.contains("crawler")
96 || ua_lower.contains("spider")
97 || ua_lower.contains("headless")
98 })
99 }
100
101 pub fn compute_fingerprint(&self) -> String {
102 use xxhash_rust::xxh64::xxh64;
103
104 if let Some(hash) = &self.fingerprint_hash {
105 return hash.clone();
106 }
107
108 let data = format!(
109 "{}|{}",
110 self.user_agent.as_deref().unwrap_or(""),
111 self.accept_language
112 .as_deref()
113 .or(self.preferred_locale.as_deref())
114 .unwrap_or("")
115 );
116
117 format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
118 }
119}
120
121#[derive(Debug, Clone)]
122pub struct AnalyticsSession {
123 pub session_id: SessionId,
124 pub user_id: Option<UserId>,
125 pub fingerprint: Option<String>,
126 pub created_at: DateTime<Utc>,
127}
128
129#[derive(Debug, Clone)]
130pub struct ActiveSession {
131 pub user_id: Option<UserId>,
132}
133
134#[derive(Debug)]
135pub struct CreateSessionInput<'a> {
136 pub session_id: &'a SessionId,
137 pub user_id: Option<&'a UserId>,
138 pub analytics: &'a SessionAnalytics,
139 pub session_source: SessionSource,
140 pub is_bot: bool,
141 pub is_ai_crawler: bool,
142 pub expires_at: DateTime<Utc>,
143}
144
145#[async_trait]
146pub trait AnalyticsProvider: Send + Sync {
147 fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
148
149 async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
150
151 async fn find_recent_session_by_fingerprint(
152 &self,
153 fingerprint: &str,
154 max_age_seconds: i64,
155 ) -> AnalyticsResult<Option<AnalyticsSession>>;
156
157 async fn find_session_by_id(
158 &self,
159 session_id: &SessionId,
160 ) -> AnalyticsResult<Option<AnalyticsSession>>;
161
162 async fn find_active_session_by_id(
163 &self,
164 session_id: &SessionId,
165 ) -> AnalyticsResult<Option<ActiveSession>>;
166
167 async fn revoke_session(&self, session_id: &SessionId) -> AnalyticsResult<()>;
168
169 async fn revoke_all_sessions_for_user(&self, user_id: &UserId) -> AnalyticsResult<u64>;
170
171 async fn migrate_user_sessions(
172 &self,
173 from_user_id: &UserId,
174 to_user_id: &UserId,
175 ) -> AnalyticsResult<u64>;
176
177 async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
178}
179
180#[async_trait]
181pub trait FingerprintProvider: Send + Sync {
182 async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
183
184 async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
185
186 async fn upsert_fingerprint(
187 &self,
188 fingerprint: &str,
189 ip_address: Option<&str>,
190 user_agent: Option<&str>,
191 screen_info: Option<&str>,
192 ) -> AnalyticsResult<()>;
193}
194
195pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
196
197pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;