Skip to main content

rush_sync_server/server/
analytics.rs

1// src/server/analytics.rs
2//
3// Lightweight in-memory analytics tracker with periodic file persistence.
4// Filters out noise (health checks, bots, internal assets) and tracks
5// meaningful page views, downloads, unique visitors, and subdomain stats.
6
7use chrono::{Local, NaiveDate, TimeDelta};
8use serde::{Deserialize, Serialize};
9use serde_json::json;
10use std::collections::{HashMap, HashSet, VecDeque};
11use std::sync::{Arc, OnceLock, RwLock};
12
13static ANALYTICS: OnceLock<Arc<RwLock<AnalyticsTracker>>> = OnceLock::new();
14
15#[derive(Debug, Default, Serialize, Deserialize)]
16pub struct AnalyticsTracker {
17    days: HashMap<String, DayData>,
18    hourly: VecDeque<HourBucket>,
19}
20
21#[derive(Debug, Default, Clone, Serialize, Deserialize)]
22struct DayData {
23    total_views: u64,
24    total_downloads: u64,
25    unique_ips: HashSet<String>,
26    page_counts: HashMap<String, u64>,
27    download_counts: HashMap<String, u64>,
28    subdomain_views: HashMap<String, u64>,
29    subdomain_ips: HashMap<String, HashSet<String>>,
30}
31
32#[derive(Debug, Default, Clone, Serialize, Deserialize)]
33struct HourBucket {
34    hour: String,
35    views: u64,
36    unique_ips: HashSet<String>,
37}
38
39/// Get or initialize the global analytics tracker.
40/// On first call, loads persisted data from disk and starts periodic save.
41pub fn get_analytics() -> &'static Arc<RwLock<AnalyticsTracker>> {
42    ANALYTICS.get_or_init(|| {
43        let tracker = load_from_file().unwrap_or_default();
44        let arc = Arc::new(RwLock::new(tracker));
45
46        let arc_clone = arc.clone();
47        std::thread::spawn(move || {
48            let rt = tokio::runtime::Builder::new_current_thread()
49                .enable_all()
50                .build()
51                .expect("analytics save runtime");
52            rt.block_on(async move {
53                loop {
54                    tokio::time::sleep(tokio::time::Duration::from_secs(300)).await;
55                    if let Ok(tracker) = arc_clone.read() {
56                        if let Err(e) = save_to_file(&tracker) {
57                            log::error!("Failed to save analytics: {}", e);
58                        }
59                    }
60                }
61            });
62        });
63
64        log::info!("Analytics tracker initialized");
65        arc
66    })
67}
68
69/// Track a single request. Called from proxy handler and server middleware.
70/// Filters out non-trackable requests (health checks, bots, internal assets).
71pub fn track_request(subdomain: &str, path: &str, ip: &str, user_agent: &str) {
72    if !is_trackable_request(path, user_agent) {
73        return;
74    }
75
76    let analytics = get_analytics();
77    let mut tracker = match analytics.write() {
78        Ok(t) => t,
79        Err(_) => return,
80    };
81
82    let now = Local::now();
83    let date = now.format("%Y-%m-%d").to_string();
84    let hour_key = now.format("%Y-%m-%dT%H:00").to_string();
85    let ip_hash = hash_ip(ip);
86    let subdomain_key = if subdomain.is_empty() {
87        "direct"
88    } else {
89        subdomain
90    };
91
92    let clean_path = path.split('?').next().unwrap_or(path);
93
94    // Update day data
95    let day = tracker.days.entry(date).or_default();
96    day.total_views += 1;
97    day.unique_ips.insert(ip_hash.clone());
98    *day.page_counts.entry(clean_path.to_string()).or_default() += 1;
99    *day.subdomain_views
100        .entry(subdomain_key.to_string())
101        .or_default() += 1;
102    day.subdomain_ips
103        .entry(subdomain_key.to_string())
104        .or_default()
105        .insert(ip_hash.clone());
106
107    if is_download(clean_path) {
108        day.total_downloads += 1;
109        *day.download_counts
110            .entry(clean_path.to_string())
111            .or_default() += 1;
112    }
113
114    // Update hourly bucket
115    if let Some(bucket) = tracker.hourly.back_mut() {
116        if bucket.hour == hour_key {
117            bucket.views += 1;
118            bucket.unique_ips.insert(ip_hash);
119            return;
120        }
121    }
122    let mut ips = HashSet::new();
123    ips.insert(ip_hash);
124    tracker.hourly.push_back(HourBucket {
125        hour: hour_key,
126        views: 1,
127        unique_ips: ips,
128    });
129    while tracker.hourly.len() > 48 {
130        tracker.hourly.pop_front();
131    }
132}
133
134fn is_trackable_request(path: &str, user_agent: &str) -> bool {
135    let path_lower = path.to_lowercase();
136    let clean = path_lower.split('?').next().unwrap_or(&path_lower);
137
138    // Filter monitoring/internal endpoints
139    if matches!(
140        clean,
141        "/api/health"
142            | "/api/status"
143            | "/api/metrics"
144            | "/api/analytics"
145            | "/api/analytics/dashboard"
146            | "/api/logs"
147            | "/api/logs/raw"
148            | "/api/ping"
149    ) {
150        return false;
151    }
152
153    // Filter internal assets
154    if clean.starts_with("/.rss/")
155        || clean == "/rss.js"
156        || clean.starts_with("/ws/")
157        || clean.starts_with("/.well-known/")
158        || clean == "/favicon.ico"
159    {
160        return false;
161    }
162
163    // Filter bots/crawlers
164    let ua = user_agent.to_lowercase();
165    if ua.contains("bot")
166        || ua.contains("crawler")
167        || ua.contains("spider")
168        || ua.contains("curl")
169        || ua.contains("wget")
170        || ua.contains("python-requests")
171        || ua.contains("go-http-client")
172        || ua.contains("headlesschrome")
173        || ua.contains("phantomjs")
174    {
175        return false;
176    }
177
178    true
179}
180
181fn is_download(path: &str) -> bool {
182    let lower = path.to_lowercase();
183    lower.ends_with(".zip")
184        || lower.ends_with(".tar.gz")
185        || lower.ends_with(".exe")
186        || lower.ends_with(".dmg")
187        || lower.ends_with(".deb")
188        || lower.ends_with(".rpm")
189        || lower.ends_with(".msi")
190        || lower.ends_with(".pkg")
191        || lower.ends_with(".appimage")
192}
193
194fn hash_ip(ip: &str) -> String {
195    use std::collections::hash_map::DefaultHasher;
196    use std::hash::{Hash, Hasher};
197    let mut hasher = DefaultHasher::new();
198    ip.hash(&mut hasher);
199    format!("{:x}", hasher.finish())
200}
201
202/// Get analytics summary as JSON for the API endpoint.
203pub fn get_summary() -> serde_json::Value {
204    let analytics = get_analytics();
205    let tracker = match analytics.read() {
206        Ok(t) => t,
207        Err(_) => return json!({"error": "lock poisoned"}),
208    };
209
210    let now = Local::now();
211    let today = now.format("%Y-%m-%d").to_string();
212
213    let today_data = build_period_summary(&tracker, &today, 1);
214    let week_data = build_period_summary(&tracker, &today, 7);
215    let month_data = build_period_summary(&tracker, &today, 30);
216
217    let cutoff = (now - TimeDelta::hours(24))
218        .format("%Y-%m-%dT%H:00")
219        .to_string();
220    let hourly: Vec<serde_json::Value> = tracker
221        .hourly
222        .iter()
223        .filter(|b| b.hour >= cutoff)
224        .map(|b| {
225            json!({
226                "hour": b.hour,
227                "views": b.views,
228                "unique": b.unique_ips.len()
229            })
230        })
231        .collect();
232
233    let by_subdomain = build_subdomain_summary(&tracker, &today, 7);
234
235    json!({
236        "today": today_data,
237        "last_7_days": week_data,
238        "last_30_days": month_data,
239        "hourly_traffic": hourly,
240        "by_subdomain": by_subdomain,
241    })
242}
243
244fn build_period_summary(
245    tracker: &AnalyticsTracker,
246    today: &str,
247    days: i64,
248) -> serde_json::Value {
249    let today_date = NaiveDate::parse_from_str(today, "%Y-%m-%d")
250        .unwrap_or_else(|_| Local::now().date_naive());
251
252    let mut total_views = 0u64;
253    let mut total_downloads = 0u64;
254    let mut all_ips: HashSet<String> = HashSet::new();
255    let mut page_totals: HashMap<String, u64> = HashMap::new();
256    let mut download_totals: HashMap<String, u64> = HashMap::new();
257
258    for i in 0..days {
259        let date = (today_date - TimeDelta::days(i))
260            .format("%Y-%m-%d")
261            .to_string();
262        if let Some(day) = tracker.days.get(&date) {
263            total_views += day.total_views;
264            total_downloads += day.total_downloads;
265            all_ips.extend(day.unique_ips.iter().cloned());
266            for (path, count) in &day.page_counts {
267                *page_totals.entry(path.clone()).or_default() += count;
268            }
269            for (file, count) in &day.download_counts {
270                *download_totals.entry(file.clone()).or_default() += count;
271            }
272        }
273    }
274
275    let mut pages: Vec<_> = page_totals.into_iter().collect();
276    pages.sort_by(|a, b| b.1.cmp(&a.1));
277    let top_pages: Vec<serde_json::Value> = pages
278        .into_iter()
279        .take(10)
280        .map(|(path, views)| json!({"path": path, "views": views}))
281        .collect();
282
283    let mut downloads: Vec<_> = download_totals.into_iter().collect();
284    downloads.sort_by(|a, b| b.1.cmp(&a.1));
285    let top_downloads: Vec<serde_json::Value> = downloads
286        .into_iter()
287        .take(10)
288        .map(|(file, count)| json!({"file": file, "count": count}))
289        .collect();
290
291    json!({
292        "page_views": total_views,
293        "unique_visitors": all_ips.len(),
294        "downloads": total_downloads,
295        "top_pages": top_pages,
296        "top_downloads": top_downloads,
297    })
298}
299
300fn build_subdomain_summary(
301    tracker: &AnalyticsTracker,
302    today: &str,
303    days: i64,
304) -> serde_json::Value {
305    let today_date = NaiveDate::parse_from_str(today, "%Y-%m-%d")
306        .unwrap_or_else(|_| Local::now().date_naive());
307
308    let mut views: HashMap<String, u64> = HashMap::new();
309    let mut ips: HashMap<String, HashSet<String>> = HashMap::new();
310
311    for i in 0..days {
312        let date = (today_date - TimeDelta::days(i))
313            .format("%Y-%m-%d")
314            .to_string();
315        if let Some(day) = tracker.days.get(&date) {
316            for (sub, v) in &day.subdomain_views {
317                *views.entry(sub.clone()).or_default() += v;
318            }
319            for (sub, ip_set) in &day.subdomain_ips {
320                ips.entry(sub.clone())
321                    .or_default()
322                    .extend(ip_set.iter().cloned());
323            }
324        }
325    }
326
327    let mut map = serde_json::Map::new();
328    for (sub, v) in &views {
329        let unique = ips.get(sub).map(|s| s.len()).unwrap_or(0);
330        map.insert(sub.clone(), json!({"views": v, "unique": unique}));
331    }
332    serde_json::Value::Object(map)
333}
334
335fn get_analytics_path() -> std::path::PathBuf {
336    crate::core::helpers::get_base_dir()
337        .map(|b| b.join(".rss").join("analytics.json"))
338        .unwrap_or_else(|_| std::path::PathBuf::from(".rss/analytics.json"))
339}
340
341fn save_to_file(tracker: &AnalyticsTracker) -> Result<(), Box<dyn std::error::Error>> {
342    let path = get_analytics_path();
343    if let Some(parent) = path.parent() {
344        std::fs::create_dir_all(parent)?;
345    }
346    let json = serde_json::to_string(tracker)?;
347    std::fs::write(&path, json)?;
348    log::debug!("Analytics saved to {:?}", path);
349    Ok(())
350}
351
352fn load_from_file() -> Option<AnalyticsTracker> {
353    let path = get_analytics_path();
354    let content = std::fs::read_to_string(&path).ok()?;
355    serde_json::from_str(&content).ok()
356}
357
358/// Save analytics to disk. Called during shutdown.
359pub fn save_analytics_on_shutdown() {
360    if let Some(analytics) = ANALYTICS.get() {
361        if let Ok(mut tracker) = analytics.write() {
362            prune_old_data(&mut tracker);
363            if let Err(e) = save_to_file(&tracker) {
364                log::error!("Failed to save analytics on shutdown: {}", e);
365            } else {
366                log::info!("Analytics saved on shutdown");
367            }
368        }
369    }
370}
371
372fn prune_old_data(tracker: &mut AnalyticsTracker) {
373    let cutoff = (Local::now() - TimeDelta::days(60))
374        .format("%Y-%m-%d")
375        .to_string();
376    tracker
377        .days
378        .retain(|date, _| date.as_str() >= cutoff.as_str());
379}
380
381/// Dashboard HTML template. The placeholder `__ANALYTICS_DATA__` is replaced
382/// with the current analytics JSON at render time.
383pub const DASHBOARD_HTML: &str = r#"<!DOCTYPE html>
384<html lang="en">
385<head>
386<meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1.0">
387<title>Analytics - Rush Sync Server</title>
388<link rel="icon" href="/.rss/favicon.svg" type="image/svg+xml">
389<style>
390*{margin:0;padding:0;box-sizing:border-box}
391body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;background:#0a0a0f;color:#e4e4ef;min-height:100vh}
392.container{max-width:1200px;margin:0 auto;padding:24px}
393.header{display:flex;justify-content:space-between;align-items:center;margin-bottom:24px}
394.header h1{font-size:24px;font-weight:700;letter-spacing:-0.5px}
395.header h1 span{color:#6c63ff}
396.back{color:#6c63ff;text-decoration:none;font-size:14px}
397.cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(200px,1fr));gap:16px;margin-bottom:24px}
398.card{background:#14141f;border:1px solid #2a2a3a;border-radius:12px;padding:20px}
399.card .lbl{font-size:12px;color:#8888a0;text-transform:uppercase;letter-spacing:0.5px;margin-bottom:8px}
400.card .val{font-size:36px;font-weight:700}
401.card .val.purple{color:#6c63ff}
402.card .val.green{color:#00d4aa}
403.card .val.blue{color:#00a8ff}
404.tabs{display:flex;gap:8px;margin-bottom:16px}
405.tab{padding:8px 16px;border-radius:8px;background:#14141f;border:1px solid #2a2a3a;color:#8888a0;cursor:pointer;font-size:13px;transition:all 0.2s}
406.tab:hover{border-color:#6c63ff}
407.tab.active{background:#6c63ff;color:#fff;border-color:#6c63ff}
408.section{background:#14141f;border:1px solid #2a2a3a;border-radius:12px;padding:20px;margin-bottom:16px}
409.section h2{font-size:15px;margin-bottom:16px;font-weight:600;color:#c0c0d0}
410.chart{display:flex;align-items:flex-end;gap:2px;height:140px;padding-bottom:24px;position:relative}
411.bar-w{flex:1;display:flex;flex-direction:column;align-items:center;position:relative}
412.bar{width:100%;background:linear-gradient(180deg,#6c63ff,#4a43cc);border-radius:3px 3px 0 0;min-height:2px;transition:height 0.3s;cursor:pointer}
413.bar:hover{background:linear-gradient(180deg,#8b83ff,#6c63ff)}
414.bar-lbl{font-size:8px;color:#8888a0;position:absolute;bottom:-20px;white-space:nowrap}
415.tooltip{display:none;position:absolute;top:-30px;background:#2a2a3a;color:#e4e4ef;padding:4px 8px;border-radius:4px;font-size:11px;white-space:nowrap;z-index:10}
416.bar:hover+.tooltip{display:block}
417.grid{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-bottom:16px}
418@media(max-width:768px){.grid{grid-template-columns:1fr}.cards{grid-template-columns:1fr 1fr}}
419table{width:100%;border-collapse:collapse}
420th{text-align:left;font-size:11px;color:#8888a0;text-transform:uppercase;letter-spacing:0.5px;padding:8px 0;border-bottom:1px solid #2a2a3a}
421td{padding:8px 0;font-size:13px;border-bottom:1px solid #1a1a2a}
422td:last-child{text-align:right;font-weight:600;color:#6c63ff}
423.sub-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:12px}
424.sub-card{background:#1a1a2a;border-radius:8px;padding:16px}
425.sub-card .name{font-weight:600;margin-bottom:4px;color:#e4e4ef}
426.sub-card .stats{font-size:12px;color:#8888a0}
427.empty{color:#555;font-style:italic;font-size:13px;padding:20px;text-align:center}
428.footer{text-align:center;font-size:11px;color:#555;padding:16px}
429</style>
430</head>
431<body>
432<div class="container">
433<div class="header"><h1>Analytics <span>Dashboard</span></h1><a href="/" class="back">&larr; Back</a></div>
434<div class="cards" id="cards"></div>
435<div class="tabs" id="tabs">
436<div class="tab active" data-p="today">Today</div>
437<div class="tab" data-p="last_7_days">7 Days</div>
438<div class="tab" data-p="last_30_days">30 Days</div>
439</div>
440<div class="section"><h2>Hourly Traffic (Last 24h)</h2><div class="chart" id="chart"></div></div>
441<div class="grid">
442<div class="section"><h2>Top Pages</h2><div id="pages"></div></div>
443<div class="section"><h2>Top Downloads</h2><div id="downloads"></div></div>
444</div>
445<div class="section"><h2>By Subdomain</h2><div class="sub-grid" id="subs"></div></div>
446<div class="footer" id="foot">Loading...</div>
447</div>
448<script>
449var D=__ANALYTICS_DATA__;
450var P='today';
451document.querySelectorAll('.tab').forEach(function(t){t.addEventListener('click',function(){document.querySelectorAll('.tab').forEach(function(x){x.classList.remove('active')});t.classList.add('active');P=t.dataset.p;render()})});
452function render(){var p=D[P]||D.today||{};
453document.getElementById('cards').innerHTML='<div class="card"><div class="lbl">Page Views</div><div class="val purple">'+fmt(p.page_views)+'</div></div>'+'<div class="card"><div class="lbl">Unique Visitors</div><div class="val green">'+fmt(p.unique_visitors)+'</div></div>'+'<div class="card"><div class="lbl">Downloads</div><div class="val blue">'+fmt(p.downloads)+'</div></div>';
454var h=D.hourly_traffic||[];
455if(h.length===0){document.getElementById('chart').innerHTML='<div class="empty">No hourly data yet</div>'}
456else{var mx=Math.max.apply(null,h.map(function(x){return x.views}))||1;document.getElementById('chart').innerHTML=h.map(function(x){var pct=Math.max((x.views/mx)*100,2);var hr=(x.hour.split('T')[1]||'').replace(':00','h');return '<div class="bar-w"><div class="bar" style="height:'+pct+'%"></div><div class="tooltip">'+x.views+' views, '+x.unique+' unique</div><div class="bar-lbl">'+hr+'</div></div>'}).join('')}
457var pg=p.top_pages||[];
458if(pg.length===0){document.getElementById('pages').innerHTML='<div class="empty">No page views yet</div>'}
459else{document.getElementById('pages').innerHTML='<table><tr><th>Page</th><th>Views</th></tr>'+pg.map(function(x){return '<tr><td>'+esc(x.path)+'</td><td>'+fmt(x.views)+'</td></tr>'}).join('')+'</table>'}
460var dl=p.top_downloads||[];
461if(dl.length===0){document.getElementById('downloads').innerHTML='<div class="empty">No downloads yet</div>'}
462else{document.getElementById('downloads').innerHTML='<table><tr><th>File</th><th>Count</th></tr>'+dl.map(function(x){return '<tr><td>'+esc(x.file)+'</td><td>'+fmt(x.count)+'</td></tr>'}).join('')+'</table>'}
463var sb=D.by_subdomain||{};var sk=Object.keys(sb);
464if(sk.length===0){document.getElementById('subs').innerHTML='<div class="empty">No subdomain data yet</div>'}
465else{document.getElementById('subs').innerHTML=sk.map(function(s){return '<div class="sub-card"><div class="name">'+esc(s)+'</div><div class="stats">'+fmt(sb[s].views)+' views &middot; '+fmt(sb[s].unique)+' unique</div></div>'}).join('')}
466document.getElementById('foot').textContent='Last updated: '+new Date().toLocaleTimeString()+' \u00b7 Auto-refresh in 30s'}
467function fmt(n){return (n||0).toLocaleString()}
468function esc(s){var d=document.createElement('div');d.textContent=s;return d.innerHTML}
469render();setTimeout(function(){location.reload()},30000);
470</script>
471</body></html>"#;
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476
477    #[test]
478    fn test_is_trackable_filters_health() {
479        assert!(!is_trackable_request("/api/health", "Mozilla/5.0"));
480        assert!(!is_trackable_request("/api/status", "Mozilla/5.0"));
481        assert!(!is_trackable_request("/api/metrics", "Mozilla/5.0"));
482    }
483
484    #[test]
485    fn test_is_trackable_filters_internal() {
486        assert!(!is_trackable_request("/.rss/style.css", "Mozilla/5.0"));
487        assert!(!is_trackable_request("/rss.js", "Mozilla/5.0"));
488        assert!(!is_trackable_request("/ws/hot-reload", "Mozilla/5.0"));
489        assert!(!is_trackable_request("/.well-known/acme-challenge/xxx", "Mozilla/5.0"));
490    }
491
492    #[test]
493    fn test_is_trackable_filters_bots() {
494        assert!(!is_trackable_request("/", "Googlebot/2.1"));
495        assert!(!is_trackable_request("/", "curl/7.68.0"));
496        assert!(!is_trackable_request("/", "Python-requests/2.28"));
497    }
498
499    #[test]
500    fn test_is_trackable_allows_real_requests() {
501        assert!(is_trackable_request("/", "Mozilla/5.0 (Macintosh)"));
502        assert!(is_trackable_request("/docs", "Mozilla/5.0"));
503        assert!(is_trackable_request("/about", "Safari/537.36"));
504    }
505
506    #[test]
507    fn test_is_download() {
508        assert!(is_download("/releases/app.zip"));
509        assert!(is_download("/releases/app.tar.gz"));
510        assert!(is_download("/releases/app.exe"));
511        assert!(is_download("/releases/app.dmg"));
512        assert!(is_download("/releases/app.AppImage"));
513        assert!(!is_download("/index.html"));
514        assert!(!is_download("/api/status"));
515    }
516
517    #[test]
518    fn test_hash_ip_deterministic() {
519        let h1 = hash_ip("192.168.1.1");
520        let h2 = hash_ip("192.168.1.1");
521        assert_eq!(h1, h2);
522        assert_ne!(hash_ip("192.168.1.1"), hash_ip("10.0.0.1"));
523    }
524}