1use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use http::{HeaderMap, Uri};
6use std::sync::Arc;
7use systemprompt_identifiers::{SessionId, SessionSource, UserId};
8
9pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum AnalyticsProviderError {
14 #[error("Session not found")]
15 SessionNotFound,
16
17 #[error("Fingerprint not found")]
18 FingerprintNotFound,
19
20 #[error("Internal error: {0}")]
21 Internal(String),
22}
23
24impl From<anyhow::Error> for AnalyticsProviderError {
25 fn from(err: anyhow::Error) -> Self {
26 Self::Internal(err.to_string())
27 }
28}
29
30#[derive(Debug, Clone, Default)]
31pub struct SessionAnalytics {
32 pub ip_address: Option<String>,
33 pub user_agent: Option<String>,
34 pub device_type: Option<String>,
35 pub browser: Option<String>,
36 pub os: Option<String>,
37 pub fingerprint_hash: Option<String>,
38 pub referer: Option<String>,
39 pub referrer_url: Option<String>,
40 pub referrer_source: Option<String>,
41 pub accept_language: Option<String>,
42 pub preferred_locale: Option<String>,
43 pub screen_width: Option<i32>,
44 pub screen_height: Option<i32>,
45 pub timezone: Option<String>,
46 pub page_url: Option<String>,
47 pub landing_page: Option<String>,
48 pub entry_url: Option<String>,
49 pub country: Option<String>,
50 pub region: Option<String>,
51 pub city: Option<String>,
52 pub utm_source: Option<String>,
53 pub utm_medium: Option<String>,
54 pub utm_campaign: Option<String>,
55 pub utm_content: Option<String>,
56 pub utm_term: Option<String>,
57}
58
59const AI_CRAWLER_TOKENS: &[&str] = &[
60 "notebooklm",
61 "gemini-deep-research",
62 "grammarly",
63 "chatgpt-user",
64 "oai-searchbot",
65 "gptbot",
66 "perplexitybot",
67 "perplexity-user",
68 "claudebot",
69 "claude-user",
70 "claude-web",
71 "anthropic-ai",
72 "applebot-extended",
73 "ccbot",
74 "bytespider",
75 "amazonbot",
76 "youbot",
77 "diffbot",
78 "cohere-ai",
79];
80
81impl SessionAnalytics {
82 pub fn is_ai_crawler(&self) -> bool {
83 self.user_agent.as_ref().is_some_and(|ua| {
84 let ua_lower = ua.to_lowercase();
85 AI_CRAWLER_TOKENS
86 .iter()
87 .any(|token| ua_lower.contains(token))
88 })
89 }
90
91 pub fn is_bot(&self) -> bool {
92 if self.is_ai_crawler() {
93 return false;
94 }
95 self.user_agent.as_ref().is_some_and(|ua| {
96 let ua_lower = ua.to_lowercase();
97 ua_lower.contains("bot")
98 || ua_lower.contains("crawler")
99 || ua_lower.contains("spider")
100 || ua_lower.contains("headless")
101 })
102 }
103
104 pub fn compute_fingerprint(&self) -> String {
105 use xxhash_rust::xxh64::xxh64;
106
107 if let Some(hash) = &self.fingerprint_hash {
108 return hash.clone();
109 }
110
111 let data = format!(
112 "{}|{}",
113 self.user_agent.as_deref().unwrap_or(""),
114 self.accept_language
115 .as_deref()
116 .or(self.preferred_locale.as_deref())
117 .unwrap_or("")
118 );
119
120 format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
121 }
122}
123
124#[derive(Debug, Clone)]
125pub struct AnalyticsSession {
126 pub session_id: SessionId,
127 pub user_id: Option<UserId>,
128 pub fingerprint: Option<String>,
129 pub created_at: DateTime<Utc>,
130}
131
132#[derive(Debug)]
133pub struct CreateSessionInput<'a> {
134 pub session_id: &'a SessionId,
135 pub user_id: Option<&'a UserId>,
136 pub analytics: &'a SessionAnalytics,
137 pub session_source: SessionSource,
138 pub is_bot: bool,
139 pub is_ai_crawler: bool,
140 pub expires_at: DateTime<Utc>,
141}
142
143#[async_trait]
144pub trait AnalyticsProvider: Send + Sync {
145 fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
146
147 async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
148
149 async fn find_recent_session_by_fingerprint(
150 &self,
151 fingerprint: &str,
152 max_age_seconds: i64,
153 ) -> AnalyticsResult<Option<AnalyticsSession>>;
154
155 async fn find_session_by_id(
156 &self,
157 session_id: &SessionId,
158 ) -> AnalyticsResult<Option<AnalyticsSession>>;
159
160 async fn migrate_user_sessions(
161 &self,
162 from_user_id: &UserId,
163 to_user_id: &UserId,
164 ) -> AnalyticsResult<u64>;
165
166 async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
167}
168
169#[async_trait]
170pub trait FingerprintProvider: Send + Sync {
171 async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
172
173 async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
174
175 async fn upsert_fingerprint(
176 &self,
177 fingerprint: &str,
178 ip_address: Option<&str>,
179 user_agent: Option<&str>,
180 screen_info: Option<&str>,
181 ) -> AnalyticsResult<()>;
182}
183
184pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
185
186pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;