1use async_trait::async_trait;
2use chrono::{DateTime, Utc};
3use http::{HeaderMap, Uri};
4use std::sync::Arc;
5use systemprompt_identifiers::{SessionId, SessionSource, UserId};
6
7pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
8
9#[derive(Debug, thiserror::Error)]
10#[non_exhaustive]
11pub enum AnalyticsProviderError {
12 #[error("Session not found")]
13 SessionNotFound,
14
15 #[error("Fingerprint not found")]
16 FingerprintNotFound,
17
18 #[error("Internal error: {0}")]
19 Internal(String),
20}
21
22impl From<anyhow::Error> for AnalyticsProviderError {
23 fn from(err: anyhow::Error) -> Self {
24 Self::Internal(err.to_string())
25 }
26}
27
28#[derive(Debug, Clone, Default)]
29pub struct SessionAnalytics {
30 pub ip_address: Option<String>,
31 pub user_agent: Option<String>,
32 pub device_type: Option<String>,
33 pub browser: Option<String>,
34 pub os: Option<String>,
35 pub fingerprint_hash: Option<String>,
36 pub referer: Option<String>,
37 pub referrer_url: Option<String>,
38 pub referrer_source: Option<String>,
39 pub accept_language: Option<String>,
40 pub preferred_locale: Option<String>,
41 pub screen_width: Option<i32>,
42 pub screen_height: Option<i32>,
43 pub timezone: Option<String>,
44 pub page_url: Option<String>,
45 pub landing_page: Option<String>,
46 pub entry_url: Option<String>,
47 pub country: Option<String>,
48 pub region: Option<String>,
49 pub city: Option<String>,
50 pub utm_source: Option<String>,
51 pub utm_medium: Option<String>,
52 pub utm_campaign: Option<String>,
53 pub utm_content: Option<String>,
54 pub utm_term: Option<String>,
55}
56
57const AI_CRAWLER_TOKENS: &[&str] = &[
58 "notebooklm",
59 "gemini-deep-research",
60 "grammarly",
61 "chatgpt-user",
62 "oai-searchbot",
63 "gptbot",
64 "perplexitybot",
65 "perplexity-user",
66 "claudebot",
67 "claude-user",
68 "claude-web",
69 "anthropic-ai",
70 "applebot-extended",
71 "ccbot",
72 "bytespider",
73 "amazonbot",
74 "youbot",
75 "diffbot",
76 "cohere-ai",
77];
78
79impl SessionAnalytics {
80 pub fn is_ai_crawler(&self) -> bool {
81 self.user_agent.as_ref().is_some_and(|ua| {
82 let ua_lower = ua.to_lowercase();
83 AI_CRAWLER_TOKENS
84 .iter()
85 .any(|token| ua_lower.contains(token))
86 })
87 }
88
89 pub fn is_bot(&self) -> bool {
90 if self.is_ai_crawler() {
91 return false;
92 }
93 self.user_agent.as_ref().is_some_and(|ua| {
94 let ua_lower = ua.to_lowercase();
95 ua_lower.contains("bot")
96 || ua_lower.contains("crawler")
97 || ua_lower.contains("spider")
98 || ua_lower.contains("headless")
99 })
100 }
101
102 pub fn compute_fingerprint(&self) -> String {
103 use xxhash_rust::xxh64::xxh64;
104
105 if let Some(hash) = &self.fingerprint_hash {
106 return hash.clone();
107 }
108
109 let data = format!(
110 "{}|{}",
111 self.user_agent.as_deref().unwrap_or(""),
112 self.accept_language
113 .as_deref()
114 .or(self.preferred_locale.as_deref())
115 .unwrap_or("")
116 );
117
118 format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
119 }
120}
121
122#[derive(Debug, Clone)]
123pub struct AnalyticsSession {
124 pub session_id: SessionId,
125 pub user_id: Option<UserId>,
126 pub fingerprint: Option<String>,
127 pub created_at: DateTime<Utc>,
128}
129
130#[derive(Debug)]
131pub struct CreateSessionInput<'a> {
132 pub session_id: &'a SessionId,
133 pub user_id: Option<&'a UserId>,
134 pub analytics: &'a SessionAnalytics,
135 pub session_source: SessionSource,
136 pub is_bot: bool,
137 pub is_ai_crawler: bool,
138 pub expires_at: DateTime<Utc>,
139}
140
141#[async_trait]
142pub trait AnalyticsProvider: Send + Sync {
143 fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
144
145 async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
146
147 async fn find_recent_session_by_fingerprint(
148 &self,
149 fingerprint: &str,
150 max_age_seconds: i64,
151 ) -> AnalyticsResult<Option<AnalyticsSession>>;
152
153 async fn find_session_by_id(
154 &self,
155 session_id: &SessionId,
156 ) -> AnalyticsResult<Option<AnalyticsSession>>;
157
158 async fn migrate_user_sessions(
159 &self,
160 from_user_id: &UserId,
161 to_user_id: &UserId,
162 ) -> AnalyticsResult<u64>;
163
164 async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
165}
166
167#[async_trait]
168pub trait FingerprintProvider: Send + Sync {
169 async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
170
171 async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
172
173 async fn upsert_fingerprint(
174 &self,
175 fingerprint: &str,
176 ip_address: Option<&str>,
177 user_agent: Option<&str>,
178 screen_info: Option<&str>,
179 ) -> AnalyticsResult<()>;
180}
181
182pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
183pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;