1use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use http::{HeaderMap, Uri};
6use std::sync::Arc;
7use systemprompt_identifiers::{SessionId, SessionSource, UserId};
8
9pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum AnalyticsProviderError {
14 #[error("Session not found")]
15 SessionNotFound,
16
17 #[error("Fingerprint not found")]
18 FingerprintNotFound,
19
20 #[error("Internal error: {0}")]
21 Internal(String),
22}
23
24#[derive(Debug, Clone, Default)]
25pub struct SessionAnalytics {
26 pub ip_address: Option<String>,
27 pub user_agent: Option<String>,
28 pub device_type: Option<String>,
29 pub browser: Option<String>,
30 pub os: Option<String>,
31 pub fingerprint_hash: Option<String>,
32 pub referer: Option<String>,
33 pub referrer_url: Option<String>,
34 pub referrer_source: Option<String>,
35 pub accept_language: Option<String>,
36 pub preferred_locale: Option<String>,
37 pub screen_width: Option<i32>,
38 pub screen_height: Option<i32>,
39 pub timezone: Option<String>,
40 pub page_url: Option<String>,
41 pub landing_page: Option<String>,
42 pub entry_url: Option<String>,
43 pub country: Option<String>,
44 pub region: Option<String>,
45 pub city: Option<String>,
46 pub utm_source: Option<String>,
47 pub utm_medium: Option<String>,
48 pub utm_campaign: Option<String>,
49 pub utm_content: Option<String>,
50 pub utm_term: Option<String>,
51}
52
53const AI_CRAWLER_TOKENS: &[&str] = &[
54 "notebooklm",
55 "gemini-deep-research",
56 "grammarly",
57 "chatgpt-user",
58 "oai-searchbot",
59 "gptbot",
60 "perplexitybot",
61 "perplexity-user",
62 "claudebot",
63 "claude-user",
64 "claude-web",
65 "anthropic-ai",
66 "applebot-extended",
67 "ccbot",
68 "bytespider",
69 "amazonbot",
70 "youbot",
71 "diffbot",
72 "cohere-ai",
73];
74
75impl SessionAnalytics {
76 pub fn is_ai_crawler(&self) -> bool {
77 self.user_agent.as_ref().is_some_and(|ua| {
78 let ua_lower = ua.to_lowercase();
79 AI_CRAWLER_TOKENS
80 .iter()
81 .any(|token| ua_lower.contains(token))
82 })
83 }
84
85 pub fn is_bot(&self) -> bool {
86 if self.is_ai_crawler() {
87 return false;
88 }
89 self.user_agent.as_ref().is_some_and(|ua| {
90 let ua_lower = ua.to_lowercase();
91 ua_lower.contains("bot")
92 || ua_lower.contains("crawler")
93 || ua_lower.contains("spider")
94 || ua_lower.contains("headless")
95 })
96 }
97
98 pub fn compute_fingerprint(&self) -> String {
99 use xxhash_rust::xxh64::xxh64;
100
101 if let Some(hash) = &self.fingerprint_hash {
102 return hash.clone();
103 }
104
105 let data = format!(
106 "{}|{}",
107 self.user_agent.as_deref().unwrap_or(""),
108 self.accept_language
109 .as_deref()
110 .or(self.preferred_locale.as_deref())
111 .unwrap_or("")
112 );
113
114 format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
115 }
116}
117
118#[derive(Debug, Clone)]
119pub struct AnalyticsSession {
120 pub session_id: SessionId,
121 pub user_id: Option<UserId>,
122 pub fingerprint: Option<String>,
123 pub created_at: DateTime<Utc>,
124}
125
126#[derive(Debug)]
127pub struct CreateSessionInput<'a> {
128 pub session_id: &'a SessionId,
129 pub user_id: Option<&'a UserId>,
130 pub analytics: &'a SessionAnalytics,
131 pub session_source: SessionSource,
132 pub is_bot: bool,
133 pub is_ai_crawler: bool,
134 pub expires_at: DateTime<Utc>,
135}
136
137#[async_trait]
138pub trait AnalyticsProvider: Send + Sync {
139 fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
140
141 async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
142
143 async fn find_recent_session_by_fingerprint(
144 &self,
145 fingerprint: &str,
146 max_age_seconds: i64,
147 ) -> AnalyticsResult<Option<AnalyticsSession>>;
148
149 async fn find_session_by_id(
150 &self,
151 session_id: &SessionId,
152 ) -> AnalyticsResult<Option<AnalyticsSession>>;
153
154 async fn migrate_user_sessions(
155 &self,
156 from_user_id: &UserId,
157 to_user_id: &UserId,
158 ) -> AnalyticsResult<u64>;
159
160 async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
161}
162
163#[async_trait]
164pub trait FingerprintProvider: Send + Sync {
165 async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
166
167 async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
168
169 async fn upsert_fingerprint(
170 &self,
171 fingerprint: &str,
172 ip_address: Option<&str>,
173 user_agent: Option<&str>,
174 screen_info: Option<&str>,
175 ) -> AnalyticsResult<()>;
176}
177
178pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
179
180pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;