Skip to main content

systemprompt_traits/
analytics.rs

1use async_trait::async_trait;
2use chrono::{DateTime, Utc};
3use http::{HeaderMap, Uri};
4use std::sync::Arc;
5use systemprompt_identifiers::{SessionId, SessionSource, UserId};
6
7pub type AnalyticsResult<T> = Result<T, AnalyticsProviderError>;
8
9#[derive(Debug, thiserror::Error)]
10#[non_exhaustive]
11pub enum AnalyticsProviderError {
12    #[error("Session not found")]
13    SessionNotFound,
14
15    #[error("Fingerprint not found")]
16    FingerprintNotFound,
17
18    #[error("Internal error: {0}")]
19    Internal(String),
20}
21
22impl From<anyhow::Error> for AnalyticsProviderError {
23    fn from(err: anyhow::Error) -> Self {
24        Self::Internal(err.to_string())
25    }
26}
27
28#[derive(Debug, Clone, Default)]
29pub struct SessionAnalytics {
30    pub ip_address: Option<String>,
31    pub user_agent: Option<String>,
32    pub device_type: Option<String>,
33    pub browser: Option<String>,
34    pub os: Option<String>,
35    pub fingerprint_hash: Option<String>,
36    pub referer: Option<String>,
37    pub referrer_url: Option<String>,
38    pub referrer_source: Option<String>,
39    pub accept_language: Option<String>,
40    pub preferred_locale: Option<String>,
41    pub screen_width: Option<i32>,
42    pub screen_height: Option<i32>,
43    pub timezone: Option<String>,
44    pub page_url: Option<String>,
45    pub landing_page: Option<String>,
46    pub entry_url: Option<String>,
47    pub country: Option<String>,
48    pub region: Option<String>,
49    pub city: Option<String>,
50    pub utm_source: Option<String>,
51    pub utm_medium: Option<String>,
52    pub utm_campaign: Option<String>,
53    pub utm_content: Option<String>,
54    pub utm_term: Option<String>,
55}
56
57const AI_CRAWLER_TOKENS: &[&str] = &[
58    "notebooklm",
59    "gemini-deep-research",
60    "grammarly",
61    "chatgpt-user",
62    "oai-searchbot",
63    "gptbot",
64    "perplexitybot",
65    "perplexity-user",
66    "claudebot",
67    "claude-user",
68    "claude-web",
69    "anthropic-ai",
70    "applebot-extended",
71    "ccbot",
72    "bytespider",
73    "amazonbot",
74    "youbot",
75    "diffbot",
76    "cohere-ai",
77];
78
79impl SessionAnalytics {
80    pub fn is_ai_crawler(&self) -> bool {
81        self.user_agent.as_ref().is_some_and(|ua| {
82            let ua_lower = ua.to_lowercase();
83            AI_CRAWLER_TOKENS
84                .iter()
85                .any(|token| ua_lower.contains(token))
86        })
87    }
88
89    pub fn is_bot(&self) -> bool {
90        if self.is_ai_crawler() {
91            return false;
92        }
93        self.user_agent.as_ref().is_some_and(|ua| {
94            let ua_lower = ua.to_lowercase();
95            ua_lower.contains("bot")
96                || ua_lower.contains("crawler")
97                || ua_lower.contains("spider")
98                || ua_lower.contains("headless")
99        })
100    }
101
102    pub fn compute_fingerprint(&self) -> String {
103        use xxhash_rust::xxh64::xxh64;
104
105        if let Some(hash) = &self.fingerprint_hash {
106            return hash.clone();
107        }
108
109        let data = format!(
110            "{}|{}",
111            self.user_agent.as_deref().unwrap_or(""),
112            self.accept_language
113                .as_deref()
114                .or(self.preferred_locale.as_deref())
115                .unwrap_or("")
116        );
117
118        format!("fp_{:016x}", xxh64(data.as_bytes(), 0))
119    }
120}
121
122#[derive(Debug, Clone)]
123pub struct AnalyticsSession {
124    pub session_id: SessionId,
125    pub user_id: Option<UserId>,
126    pub fingerprint: Option<String>,
127    pub created_at: DateTime<Utc>,
128}
129
130#[derive(Debug)]
131pub struct CreateSessionInput<'a> {
132    pub session_id: &'a SessionId,
133    pub user_id: Option<&'a UserId>,
134    pub analytics: &'a SessionAnalytics,
135    pub session_source: SessionSource,
136    pub is_bot: bool,
137    pub is_ai_crawler: bool,
138    pub expires_at: DateTime<Utc>,
139}
140
141#[async_trait]
142pub trait AnalyticsProvider: Send + Sync {
143    fn extract_analytics(&self, headers: &HeaderMap, uri: Option<&Uri>) -> SessionAnalytics;
144
145    async fn create_session(&self, input: CreateSessionInput<'_>) -> AnalyticsResult<()>;
146
147    async fn find_recent_session_by_fingerprint(
148        &self,
149        fingerprint: &str,
150        max_age_seconds: i64,
151    ) -> AnalyticsResult<Option<AnalyticsSession>>;
152
153    async fn find_session_by_id(
154        &self,
155        session_id: &SessionId,
156    ) -> AnalyticsResult<Option<AnalyticsSession>>;
157
158    async fn migrate_user_sessions(
159        &self,
160        from_user_id: &UserId,
161        to_user_id: &UserId,
162    ) -> AnalyticsResult<u64>;
163
164    async fn mark_session_converted(&self, session_id: &SessionId) -> AnalyticsResult<()>;
165}
166
167#[async_trait]
168pub trait FingerprintProvider: Send + Sync {
169    async fn count_active_sessions(&self, fingerprint: &str) -> AnalyticsResult<i64>;
170
171    async fn find_reusable_session(&self, fingerprint: &str) -> AnalyticsResult<Option<String>>;
172
173    async fn upsert_fingerprint(
174        &self,
175        fingerprint: &str,
176        ip_address: Option<&str>,
177        user_agent: Option<&str>,
178        screen_info: Option<&str>,
179    ) -> AnalyticsResult<()>;
180}
181
182pub type DynAnalyticsProvider = Arc<dyn AnalyticsProvider>;
183pub type DynFingerprintProvider = Arc<dyn FingerprintProvider>;