Skip to main content

systemprompt_traits/
analytics.rs

1//! Session analytics and fingerprinting provider traits.
2
3use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use http::{HeaderMap, Uri};
6use std::sync::Arc;
7use systemprompt_identifiers::{SessionId, SessionSource, UserId};
8
9pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum AnalyticsProviderError {
14    #[error("Session not found")]
15    SessionNotFound,
16
17    #[error("Fingerprint not found")]
18    FingerprintNotFound,
19
20    #[error("Internal error: {0}")]
21    Internal(String),
22}
23
24impl From<anyhow::Error> for AnalyticsProviderError {
25    fn from(err: anyhow::Error) -> Self {
26        Self::Internal(err.to_string())
27    }
28}
29
30#[derive(Debug, Clone, Default)]
31pub struct SessionAnalytics {
32    pub ip_address: Option<String>,
33    pub user_agent: Option<String>,
34    pub device_type: Option<String>,
35    pub browser: Option<String>,
36    pub os: Option<String>,
37    pub fingerprint_hash: Option<String>,
38    pub referer: Option<String>,
39    pub referrer_url: Option<String>,
40    pub referrer_source: Option<String>,
41    pub accept_language: Option<String>,
42    pub preferred_locale: Option<String>,
43    pub screen_width: Option<i32>,
44    pub screen_height: Option<i32>,
45    pub timezone: Option<String>,
46    pub page_url: Option<String>,
47    pub landing_page: Option<String>,
48    pub entry_url: Option<String>,
49    pub country: Option<String>,
50    pub region: Option<String>,
51    pub city: Option<String>,
52    pub utm_source: Option<String>,
53    pub utm_medium: Option<String>,
54    pub utm_campaign: Option<String>,
55    pub utm_content: Option<String>,
56    pub utm_term: Option<String>,
57}
58
59const AI_CRAWLER_TOKENS: &[&str] = &[
60    "notebooklm",
61    "gemini-deep-research",
62    "grammarly",
63    "chatgpt-user",
64    "oai-searchbot",
65    "gptbot",
66    "perplexitybot",
67    "perplexity-user",
68    "claudebot",
69    "claude-user",
70    "claude-web",
71    "anthropic-ai",
72    "applebot-extended",
73    "ccbot",
74    "bytespider",
75    "amazonbot",
76    "youbot",
77    "diffbot",
78    "cohere-ai",
79];
80
81impl SessionAnalytics {
82    pub fn is_ai_crawler(&self) -> bool {
83        self.user_agent.as_ref().is_some_and(|ua| {
84            let ua_lower = ua.to_lowercase();
85            AI_CRAWLER_TOKENS
86                .iter()
87                .any(|token| ua_lower.contains(token))
88        })
89    }
90
91    pub fn is_bot(&self) -> bool {
92        if self.is_ai_crawler() {
93            return false;
94        }
95        self.user_agent.as_ref().is_some_and(|ua| {
96            let ua_lower = ua.to_lowercase();
97            ua_lower.contains("bot")
98                || ua_lower.contains("crawler")
99                || ua_lower.contains("spider")
100                || ua_lower.contains("headless")
101        })
102    }
103
104    pub fn compute_fingerprint(&self) -> String {
105        use xxhash_rust::xxh64::xxh64;
106
107        if let Some(hash) = &self.fingerprint_hash {
108            return hash.clone();
109        }
110
111        let data = format!(
112            "{}|{}",
113            self.user_agent.as_deref().unwrap_or(""),
114            self.accept_language
115                .as_deref()
116                .or(self.preferred_locale.as_deref())
117                .unwrap_or("")
118        );
119
120        format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
121    }
122}
123
124#[derive(Debug, Clone)]
125pub struct AnalyticsSession {
126    pub session_id: SessionId,
127    pub user_id: Option<UserId>,
128    pub fingerprint: Option<String>,
129    pub created_at: DateTime<Utc>,
130}
131
132#[derive(Debug)]
133pub struct CreateSessionInput<'a> {
134    pub session_id: &'a SessionId,
135    pub user_id: Option<&'a UserId>,
136    pub analytics: &'a SessionAnalytics,
137    pub session_source: SessionSource,
138    pub is_bot: bool,
139    pub is_ai_crawler: bool,
140    pub expires_at: DateTime<Utc>,
141}
142
143#[async_trait]
144pub trait AnalyticsProvider: Send + Sync {
145    fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
146
147    async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
148
149    async fn find_recent_session_by_fingerprint(
150        &self,
151        fingerprint: &str,
152        max_age_seconds: i64,
153    ) -> AnalyticsResult<Option<AnalyticsSession>>;
154
155    async fn find_session_by_id(
156        &self,
157        session_id: &SessionId,
158    ) -> AnalyticsResult<Option<AnalyticsSession>>;
159
160    async fn migrate_user_sessions(
161        &self,
162        from_user_id: &UserId,
163        to_user_id: &UserId,
164    ) -> AnalyticsResult<u64>;
165
166    async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
167}
168
169#[async_trait]
170pub trait FingerprintProvider: Send + Sync {
171    async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
172
173    async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
174
175    async fn upsert_fingerprint(
176        &self,
177        fingerprint: &str,
178        ip_address: Option<&str>,
179        user_agent: Option<&str>,
180        screen_info: Option<&str>,
181    ) -> AnalyticsResult<()>;
182}
183
184pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
185
186pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;