Skip to main content

systemprompt_traits/
analytics.rs

1//! Session analytics and fingerprinting provider traits.
2
3use async_trait::async_trait;
4use chrono::{DateTime, Utc};
5use http::{HeaderMap, Uri};
6use std::sync::Arc;
7use systemprompt_identifiers::{SessionId, SessionSource, UserId};
8
9pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum AnalyticsProviderError {
14    #[error("Session not found")]
15    SessionNotFound,
16
17    #[error("Fingerprint not found")]
18    FingerprintNotFound,
19
20    #[error("Internal error: {0}")]
21    Internal(String),
22}
23
24#[derive(Debug, Clone, Default)]
25pub struct SessionAnalytics {
26    pub ip_address: Option<String>,
27    pub user_agent: Option<String>,
28    pub device_type: Option<String>,
29    pub browser: Option<String>,
30    pub os: Option<String>,
31    pub fingerprint_hash: Option<String>,
32    pub referer: Option<String>,
33    pub referrer_url: Option<String>,
34    pub referrer_source: Option<String>,
35    pub accept_language: Option<String>,
36    pub preferred_locale: Option<String>,
37    pub screen_width: Option<i32>,
38    pub screen_height: Option<i32>,
39    pub timezone: Option<String>,
40    pub page_url: Option<String>,
41    pub landing_page: Option<String>,
42    pub entry_url: Option<String>,
43    pub country: Option<String>,
44    pub region: Option<String>,
45    pub city: Option<String>,
46    pub utm_source: Option<String>,
47    pub utm_medium: Option<String>,
48    pub utm_campaign: Option<String>,
49    pub utm_content: Option<String>,
50    pub utm_term: Option<String>,
51}
52
53const AI_CRAWLER_TOKENS: &[&str] = &[
54    "notebooklm",
55    "gemini-deep-research",
56    "grammarly",
57    "chatgpt-user",
58    "oai-searchbot",
59    "gptbot",
60    "perplexitybot",
61    "perplexity-user",
62    "claudebot",
63    "claude-user",
64    "claude-web",
65    "anthropic-ai",
66    "applebot-extended",
67    "ccbot",
68    "bytespider",
69    "amazonbot",
70    "youbot",
71    "diffbot",
72    "cohere-ai",
73];
74
75impl SessionAnalytics {
76    pub fn is_ai_crawler(&self) -> bool {
77        self.user_agent.as_ref().is_some_and(|ua| {
78            let ua_lower = ua.to_lowercase();
79            AI_CRAWLER_TOKENS
80                .iter()
81                .any(|token| ua_lower.contains(token))
82        })
83    }
84
85    pub fn is_bot(&self) -> bool {
86        if self.is_ai_crawler() {
87            return false;
88        }
89        self.user_agent.as_ref().is_some_and(|ua| {
90            let ua_lower = ua.to_lowercase();
91            ua_lower.contains("bot")
92                || ua_lower.contains("crawler")
93                || ua_lower.contains("spider")
94                || ua_lower.contains("headless")
95        })
96    }
97
98    pub fn compute_fingerprint(&self) -> String {
99        use xxhash_rust::xxh64::xxh64;
100
101        if let Some(hash) = &self.fingerprint_hash {
102            return hash.clone();
103        }
104
105        let data = format!(
106            "{}|{}",
107            self.user_agent.as_deref().unwrap_or(""),
108            self.accept_language
109                .as_deref()
110                .or(self.preferred_locale.as_deref())
111                .unwrap_or("")
112        );
113
114        format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
115    }
116}
117
118#[derive(Debug, Clone)]
119pub struct AnalyticsSession {
120    pub session_id: SessionId,
121    pub user_id: Option<UserId>,
122    pub fingerprint: Option<String>,
123    pub created_at: DateTime<Utc>,
124}
125
126#[derive(Debug)]
127pub struct CreateSessionInput<'a> {
128    pub session_id: &'a SessionId,
129    pub user_id: Option<&'a UserId>,
130    pub analytics: &'a SessionAnalytics,
131    pub session_source: SessionSource,
132    pub is_bot: bool,
133    pub is_ai_crawler: bool,
134    pub expires_at: DateTime<Utc>,
135}
136
137#[async_trait]
138pub trait AnalyticsProvider: Send + Sync {
139    fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
140
141    async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
142
143    async fn find_recent_session_by_fingerprint(
144        &self,
145        fingerprint: &str,
146        max_age_seconds: i64,
147    ) -> AnalyticsResult<Option<AnalyticsSession>>;
148
149    async fn find_session_by_id(
150        &self,
151        session_id: &SessionId,
152    ) -> AnalyticsResult<Option<AnalyticsSession>>;
153
154    async fn migrate_user_sessions(
155        &self,
156        from_user_id: &UserId,
157        to_user_id: &UserId,
158    ) -> AnalyticsResult<u64>;
159
160    async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
161}
162
163#[async_trait]
164pub trait FingerprintProvider: Send + Sync {
165    async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
166
167    async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
168
169    async fn upsert_fingerprint(
170        &self,
171        fingerprint: &str,
172        ip_address: Option<&str>,
173        user_agent: Option<&str>,
174        screen_info: Option<&str>,
175    ) -> AnalyticsResult<()>;
176}
177
178pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
179
180pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;