1use crate::Database;
2use anyhow::Result;
3use chrono::Timelike;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::collections::HashMap;
7use std::sync::Arc;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AnalyticsConfig {
12 pub enabled: bool,
14 pub raw_event_retention_hours: u32,
16 pub hourly_retention_days: u32,
18 pub daily_retention_days: u32,
20 pub session_timeout_minutes: u32,
22 pub excluded_paths: Vec<String>,
24 pub excluded_prefixes: Vec<String>,
26 pub geo_lookup: bool,
28 pub respect_dnt: bool,
30 pub sample_rate: f64,
32}
33
34impl Default for AnalyticsConfig {
35 fn default() -> Self {
36 Self {
37 enabled: true,
38 raw_event_retention_hours: 48,
39 hourly_retention_days: 90,
40 daily_retention_days: 0, session_timeout_minutes: 30,
42 excluded_paths: vec![
43 "/health".into(),
44 "/robots.txt".into(),
45 "/favicon.ico".into(),
46 ],
47 excluded_prefixes: vec![
48 "/admin".into(),
49 "/api".into(),
50 "/static".into(),
51 "/media".into(),
52 "/_".into(),
53 ],
54 geo_lookup: true,
55 respect_dnt: true,
56 sample_rate: 1.0,
57 }
58 }
59}
60
61impl AnalyticsConfig {
62 pub fn should_track(&self, path: &str) -> bool {
64 if !self.enabled {
65 return false;
66 }
67
68 if self.excluded_paths.iter().any(|p| p == path) {
70 return false;
71 }
72
73 if self
75 .excluded_prefixes
76 .iter()
77 .any(|prefix| path.starts_with(prefix))
78 {
79 return false;
80 }
81
82 if self.sample_rate < 1.0 {
84 use rand::Rng;
85 if rand::thread_rng().gen::<f64>() > self.sample_rate {
86 return false;
87 }
88 }
89
90 true
91 }
92
93 pub fn should_respect_dnt(&self, dnt_header: Option<&str>) -> bool {
95 if !self.respect_dnt {
96 return false;
97 }
98 matches!(dnt_header, Some("1"))
99 }
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct AnalyticsEvent {
104 pub path: String,
105 pub referrer_domain: Option<String>,
106 pub country_code: Option<String>,
107 pub device_type: DeviceType,
108 pub browser_family: String,
109 pub session_hash: String,
110 pub response_time_ms: Option<i64>,
111 pub status_code: u16,
112 pub content_id: Option<i64>,
113 pub content_type: Option<String>,
114}
115
116#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
117#[serde(rename_all = "lowercase")]
118pub enum DeviceType {
119 #[default]
120 Desktop,
121 Mobile,
122 Tablet,
123}
124
125impl std::fmt::Display for DeviceType {
126 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127 match self {
128 DeviceType::Desktop => write!(f, "desktop"),
129 DeviceType::Mobile => write!(f, "mobile"),
130 DeviceType::Tablet => write!(f, "tablet"),
131 }
132 }
133}
134
135#[derive(Debug, Clone, Serialize)]
136pub struct DashboardSummary {
137 pub total_pageviews: i64,
138 pub unique_sessions: i64,
139 pub pageviews_change: f64,
140 pub sessions_change: f64,
141 pub avg_response_time_ms: i64,
142 pub error_rate: f64,
143 pub top_pages: Vec<PageStats>,
144 pub top_referrers: Vec<ReferrerStats>,
145 pub devices: HashMap<String, i64>,
146 pub browsers: HashMap<String, i64>,
147 pub total_devices: i64,
148 pub total_browsers: i64,
149 pub countries: Vec<CountryStats>,
150 pub pageviews_over_time: Vec<TimeSeriesPoint>,
151 pub pageviews_max: i64,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct PageStats {
156 pub path: String,
157 pub title: Option<String>,
158 pub content_type: Option<String>,
159 pub pageviews: i64,
160 pub unique_sessions: i64,
161}
162
163#[derive(Debug, Clone, Serialize)]
164pub struct ReferrerStats {
165 pub domain: String,
166 pub sessions: i64,
167 pub percentage: f64,
168}
169
170#[derive(Debug, Clone, Serialize)]
171pub struct CountryStats {
172 pub code: String,
173 pub name: String,
174 pub sessions: i64,
175 pub percentage: f64,
176}
177
178#[derive(Debug, Clone, Serialize)]
179pub struct TimeSeriesPoint {
180 pub timestamp: String,
181 pub value: i64,
182}
183
184#[derive(Debug, Clone, Serialize)]
185pub struct RealtimeStats {
186 pub active_sessions: i64,
187 pub pageviews_30min: i64,
188 pub current_pages: Vec<ActivePage>,
189 pub recent_referrers: Vec<RecentReferrer>,
190}
191
192#[derive(Debug, Clone, Serialize)]
193pub struct ActivePage {
194 pub path: String,
195 pub visitors: i64,
196}
197
198#[derive(Debug, Clone, Serialize)]
199pub struct RecentReferrer {
200 pub domain: String,
201 pub seconds_ago: i64,
202}
203
204#[derive(Debug, Clone, Serialize)]
205pub struct ContentStats {
206 pub content_id: i64,
207 pub total_pageviews: i64,
208 pub unique_sessions: i64,
209 pub first_viewed_at: Option<String>,
210 pub last_viewed_at: Option<String>,
211 pub view_trend: Vec<i64>,
212 pub top_referrers: Vec<ReferrerStats>,
213 pub bounce_rate: f64,
214}
215
216#[derive(Debug, Clone, Serialize)]
218pub struct ContentPerformance {
219 pub content_id: i64,
220 pub title: String,
221 pub content_type: String,
222 pub slug: String,
223 pub pageviews: i64,
224 pub unique_sessions: i64,
225 pub avg_time_seconds: i64,
226 pub bounce_rate: f64,
227 pub trend: String, pub trend_percent: f64,
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct DailyStats {
234 pub date: String,
235 pub total_pageviews: i64,
236 pub unique_sessions: i64,
237 pub top_pages: Vec<PageStats>,
238 pub top_posts: Vec<PageStats>,
239 pub referrers: HashMap<String, i64>,
240 pub countries: HashMap<String, i64>,
241 pub devices: HashMap<String, i64>,
242 pub browsers: HashMap<String, i64>,
243 pub avg_response_time_ms: i64,
244 pub error_rate: f64,
245 pub new_content_views: i64,
246}
247
248#[derive(Debug, Clone, Copy)]
250pub enum ExportFormat {
251 Json,
252 Csv,
253}
254
255#[derive(Debug, Clone, Serialize)]
257pub struct AnalyticsExport {
258 pub exported_at: String,
259 pub date_range: DateRange,
260 pub summary: ExportSummary,
261 pub daily_stats: Vec<DailyStats>,
262 pub top_pages: Vec<PageStats>,
263 pub referrers: Vec<ReferrerStats>,
264 pub countries: Vec<CountryStats>,
265}
266
267#[derive(Debug, Clone, Serialize)]
268pub struct ExportSummary {
269 pub total_pageviews: i64,
270 pub unique_sessions: i64,
271 pub avg_response_time_ms: i64,
272}
273
274#[derive(Debug, Clone, Serialize)]
275pub struct DateRange {
276 pub start: String,
277 pub end: String,
278}
279
280pub struct Analytics {
281 db: Database,
282 config: AnalyticsConfig,
283}
284
285impl Analytics {
286 pub fn new(db: Database) -> Self {
287 Self {
288 db,
289 config: AnalyticsConfig::default(),
290 }
291 }
292
293 pub fn with_config(db: Database, config: AnalyticsConfig) -> Self {
294 Self { db, config }
295 }
296
297 pub fn config(&self) -> &AnalyticsConfig {
298 &self.config
299 }
300
301 pub fn should_track(&self, path: &str, dnt_header: Option<&str>) -> bool {
303 if self.config.should_respect_dnt(dnt_header) {
304 return false;
305 }
306 self.config.should_track(path)
307 }
308
309 pub fn record_event(&self, event: &AnalyticsEvent) -> Result<()> {
310 let conn = self.db.get()?;
311 conn.execute(
312 r#"
313 INSERT INTO analytics_events
314 (path, referrer_domain, country_code, device_type, browser_family,
315 session_hash, response_time_ms, status_code, content_id, content_type)
316 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
317 "#,
318 rusqlite::params![
319 event.path,
320 event.referrer_domain,
321 event.country_code,
322 event.device_type.to_string(),
323 event.browser_family,
324 event.session_hash,
325 event.response_time_ms,
326 event.status_code as i64,
327 event.content_id,
328 event.content_type,
329 ],
330 )?;
331
332 if let Some(content_id) = event.content_id {
334 self.update_content_analytics(
335 content_id,
336 &event.session_hash,
337 event.referrer_domain.as_deref(),
338 )?;
339 }
340
341 Ok(())
342 }
343
344 fn update_content_analytics(
346 &self,
347 content_id: i64,
348 session_hash: &str,
349 referrer: Option<&str>,
350 ) -> Result<()> {
351 let conn = self.db.get()?;
352 let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
353
354 let is_new_session: bool = conn
356 .query_row(
357 r#"
358 SELECT COUNT(*) = 0
359 FROM analytics_events
360 WHERE content_id = ?1 AND session_hash = ?2
361 AND timestamp >= datetime('now', '-30 minutes')
362 AND id != (SELECT MAX(id) FROM analytics_events WHERE content_id = ?1 AND session_hash = ?2)
363 "#,
364 rusqlite::params![content_id, session_hash],
365 |row| row.get::<_, bool>(0),
366 )
367 .unwrap_or(true);
368
369 conn.execute(
371 r#"
372 INSERT INTO analytics_content (content_id, total_pageviews, unique_sessions, first_viewed_at, last_viewed_at)
373 VALUES (?1, 1, ?2, ?3, ?3)
374 ON CONFLICT(content_id) DO UPDATE SET
375 total_pageviews = total_pageviews + 1,
376 unique_sessions = unique_sessions + ?2,
377 last_viewed_at = ?3
378 "#,
379 rusqlite::params![content_id, if is_new_session { 1 } else { 0 }, &now],
380 )?;
381
382 if let Some(domain) = referrer {
384 let existing_referrers: String = conn
385 .query_row(
386 "SELECT top_referrers FROM analytics_content WHERE content_id = ?1",
387 [content_id],
388 |row| row.get(0),
389 )
390 .unwrap_or_else(|_| "[]".to_string());
391
392 let mut referrers: Vec<(String, i64)> =
393 serde_json::from_str(&existing_referrers).unwrap_or_default();
394
395 if let Some(entry) = referrers.iter_mut().find(|(d, _)| d == domain) {
397 entry.1 += 1;
398 } else {
399 referrers.push((domain.to_string(), 1));
400 }
401
402 referrers.sort_by(|a, b| b.1.cmp(&a.1));
404 referrers.truncate(10);
405
406 let referrers_json = serde_json::to_string(&referrers)?;
407 conn.execute(
408 "UPDATE analytics_content SET top_referrers = ?1 WHERE content_id = ?2",
409 rusqlite::params![referrers_json, content_id],
410 )?;
411 }
412
413 Ok(())
414 }
415
416 pub fn get_summary(&self, days: i64) -> Result<DashboardSummary> {
417 let conn = self.db.get()?;
418
419 let now = chrono::Utc::now();
420 let cutoff = (now - chrono::TimeDelta::days(days))
421 .format("%Y-%m-%dT%H:%M:%SZ")
422 .to_string();
423 let prev_cutoff = (now - chrono::TimeDelta::days(days * 2))
424 .format("%Y-%m-%dT%H:%M:%SZ")
425 .to_string();
426
427 tracing::debug!("Analytics query: days={}, cutoff={}", days, cutoff);
428
429 let (events_pageviews, events_sessions, events_avg_response, events_errors): (
431 i64,
432 i64,
433 Option<f64>,
434 i64,
435 ) = conn
436 .query_row(
437 r#"
438 SELECT
439 COUNT(*) as pageviews,
440 COUNT(DISTINCT session_hash) as sessions,
441 AVG(response_time_ms) as avg_response,
442 SUM(CASE WHEN status_code >= 400 THEN 1 ELSE 0 END) as errors
443 FROM analytics_events
444 WHERE timestamp >= ?1
445 "#,
446 [&cutoff],
447 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
448 )
449 .unwrap_or((0, 0, None, 0));
450
451 let (hourly_pageviews, hourly_sessions, hourly_avg_response, hourly_errors): (
452 i64,
453 i64,
454 Option<f64>,
455 i64,
456 ) = conn
457 .query_row(
458 r#"
459 SELECT
460 COALESCE(SUM(pageviews), 0) as pageviews,
461 COALESCE(SUM(unique_sessions), 0) as sessions,
462 AVG(avg_response_time_ms) as avg_response,
463 COALESCE(SUM(error_count), 0) as errors
464 FROM analytics_hourly
465 WHERE hour >= ?1
466 "#,
467 [&cutoff],
468 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
469 )
470 .unwrap_or((0, 0, None, 0));
471
472 let total_pageviews = events_pageviews + hourly_pageviews;
473 let unique_sessions = events_sessions + hourly_sessions;
474 let avg_response_time = events_avg_response
475 .or(hourly_avg_response)
476 .map(|v| v as i64);
477 let error_count = events_errors + hourly_errors;
478
479 let (prev_events_pv, prev_events_sess): (i64, i64) = conn
480 .query_row(
481 r#"
482 SELECT
483 COUNT(*) as pageviews,
484 COUNT(DISTINCT session_hash) as sessions
485 FROM analytics_events
486 WHERE timestamp >= ?1 AND timestamp < ?2
487 "#,
488 [&prev_cutoff, &cutoff],
489 |row| Ok((row.get(0)?, row.get(1)?)),
490 )
491 .unwrap_or((0, 0));
492
493 let (prev_hourly_pv, prev_hourly_sess): (i64, i64) = conn
494 .query_row(
495 r#"
496 SELECT
497 COALESCE(SUM(pageviews), 0) as pageviews,
498 COALESCE(SUM(unique_sessions), 0) as sessions
499 FROM analytics_hourly
500 WHERE hour >= ?1 AND hour < ?2
501 "#,
502 [&prev_cutoff, &cutoff],
503 |row| Ok((row.get(0)?, row.get(1)?)),
504 )
505 .unwrap_or((0, 0));
506
507 let prev_pageviews = prev_events_pv + prev_hourly_pv;
508 let prev_sessions = prev_events_sess + prev_hourly_sess;
509
510 let pageviews_change = if prev_pageviews > 0 {
511 ((total_pageviews - prev_pageviews) as f64 / prev_pageviews as f64) * 100.0
512 } else {
513 0.0
514 };
515
516 let sessions_change = if prev_sessions > 0 {
517 ((unique_sessions - prev_sessions) as f64 / prev_sessions as f64) * 100.0
518 } else {
519 0.0
520 };
521
522 let error_rate = if total_pageviews > 0 {
523 (error_count as f64 / total_pageviews as f64) * 100.0
524 } else {
525 0.0
526 };
527
528 let mut stmt = conn.prepare(
530 r#"
531 SELECT path, SUM(views) as total_views, SUM(sessions) as total_sessions
532 FROM (
533 SELECT path, COUNT(*) as views, COUNT(DISTINCT session_hash) as sessions
534 FROM analytics_events
535 WHERE timestamp >= ?1
536 GROUP BY path
537 UNION ALL
538 SELECT path, pageviews as views, unique_sessions as sessions
539 FROM analytics_hourly
540 WHERE hour >= ?1
541 )
542 GROUP BY path
543 ORDER BY total_views DESC
544 LIMIT 10
545 "#,
546 )?;
547 let top_pages: Vec<PageStats> = stmt
548 .query_map([&cutoff], |row| {
549 Ok(PageStats {
550 path: row.get(0)?,
551 title: None,
552 content_type: None,
553 pageviews: row.get(1)?,
554 unique_sessions: row.get(2)?,
555 })
556 })?
557 .filter_map(|r| r.ok())
558 .collect();
559
560 let mut stmt = conn.prepare(
561 r#"
562 SELECT referrer_domain, COUNT(DISTINCT session_hash) as sessions
563 FROM analytics_events
564 WHERE timestamp >= ?1 AND referrer_domain IS NOT NULL
565 GROUP BY referrer_domain
566 ORDER BY sessions DESC
567 LIMIT 10
568 "#,
569 )?;
570 let referrer_rows: Vec<(String, i64)> = stmt
571 .query_map([&cutoff], |row| Ok((row.get(0)?, row.get(1)?)))?
572 .filter_map(|r| r.ok())
573 .collect();
574
575 let referrer_total: i64 = referrer_rows.iter().map(|(_, s)| s).sum();
576 let top_referrers: Vec<ReferrerStats> = referrer_rows
577 .into_iter()
578 .map(|(domain, sessions)| ReferrerStats {
579 domain,
580 sessions,
581 percentage: if referrer_total > 0 {
582 (sessions as f64 / referrer_total as f64) * 100.0
583 } else {
584 0.0
585 },
586 })
587 .collect();
588
589 let mut devices: HashMap<String, i64> = HashMap::new();
590 let mut stmt = conn.prepare(
591 r#"
592 SELECT device_type, COUNT(*) as count
593 FROM analytics_events
594 WHERE timestamp >= ?1
595 GROUP BY device_type
596 ORDER BY count DESC
597 LIMIT 20
598 "#,
599 )?;
600 for row in stmt.query_map([&cutoff], |row| {
601 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
602 })? {
603 if let Ok((device, count)) = row {
604 devices.insert(device, count);
605 }
606 }
607
608 let mut browsers: HashMap<String, i64> = HashMap::new();
609 let mut stmt = conn.prepare(
610 r#"
611 SELECT browser_family, COUNT(*) as count
612 FROM analytics_events
613 WHERE timestamp >= ?1
614 GROUP BY browser_family
615 ORDER BY count DESC
616 LIMIT 50
617 "#,
618 )?;
619 for row in stmt.query_map([&cutoff], |row| {
620 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
621 })? {
622 if let Ok((browser, count)) = row {
623 browsers.insert(browser, count);
624 }
625 }
626
627 let mut stmt = conn.prepare(
628 r#"
629 SELECT country_code, COUNT(DISTINCT session_hash) as sessions
630 FROM analytics_events
631 WHERE timestamp >= ?1 AND country_code IS NOT NULL
632 GROUP BY country_code
633 ORDER BY sessions DESC
634 LIMIT 10
635 "#,
636 )?;
637 let country_rows: Vec<(String, i64)> = stmt
638 .query_map([&cutoff], |row| Ok((row.get(0)?, row.get(1)?)))?
639 .filter_map(|r| r.ok())
640 .collect();
641
642 let country_total: i64 = country_rows.iter().map(|(_, s)| s).sum();
643 let countries: Vec<CountryStats> = country_rows
644 .into_iter()
645 .map(|(code, sessions)| CountryStats {
646 name: country_name(&code),
647 code,
648 sessions,
649 percentage: if country_total > 0 {
650 (sessions as f64 / country_total as f64) * 100.0
651 } else {
652 0.0
653 },
654 })
655 .collect();
656
657 let mut stmt = conn.prepare(
659 r#"
660 SELECT day, SUM(views) as total_views
661 FROM (
662 SELECT date(timestamp) as day, COUNT(*) as views
663 FROM analytics_events
664 WHERE timestamp >= ?1
665 GROUP BY day
666 UNION ALL
667 SELECT date(hour) as day, pageviews as views
668 FROM analytics_hourly
669 WHERE hour >= ?1
670 )
671 GROUP BY day
672 ORDER BY day ASC
673 "#,
674 )?;
675 let pageviews_over_time: Vec<TimeSeriesPoint> = stmt
676 .query_map([&cutoff], |row| {
677 Ok(TimeSeriesPoint {
678 timestamp: row.get(0)?,
679 value: row.get::<_, i64>(1)?,
680 })
681 })?
682 .filter_map(|r| {
683 r.map_err(|e| tracing::error!("Pageviews over time query error: {}", e))
684 .ok()
685 })
686 .collect();
687
688 let pageviews_max = pageviews_over_time
689 .iter()
690 .map(|p| p.value)
691 .max()
692 .unwrap_or(1)
693 .max(1);
694
695 let total_devices: i64 = devices.values().sum();
696 let total_browsers: i64 = browsers.values().sum();
697
698 Ok(DashboardSummary {
699 total_pageviews,
700 unique_sessions,
701 pageviews_change,
702 sessions_change,
703 avg_response_time_ms: avg_response_time.unwrap_or(0),
704 error_rate,
705 top_pages,
706 top_referrers,
707 devices,
708 browsers,
709 total_devices,
710 total_browsers,
711 countries,
712 pageviews_over_time,
713 pageviews_max,
714 })
715 }
716
717 pub fn get_realtime(&self) -> Result<RealtimeStats> {
718 let conn = self.db.get()?;
719
720 let now = chrono::Utc::now();
721 let five_min_ago = (now - chrono::TimeDelta::minutes(5))
722 .format("%Y-%m-%dT%H:%M:%SZ")
723 .to_string();
724 let thirty_min_ago = (now - chrono::TimeDelta::minutes(30))
725 .format("%Y-%m-%dT%H:%M:%SZ")
726 .to_string();
727
728 let active_sessions: i64 = conn
729 .query_row(
730 r#"
731 SELECT COUNT(DISTINCT session_hash)
732 FROM analytics_events
733 WHERE timestamp >= ?1
734 "#,
735 [&five_min_ago],
736 |row| row.get(0),
737 )
738 .unwrap_or(0);
739
740 let pageviews_30min: i64 = conn
741 .query_row(
742 r#"
743 SELECT COUNT(*)
744 FROM analytics_events
745 WHERE timestamp >= ?1
746 "#,
747 [&thirty_min_ago],
748 |row| row.get(0),
749 )
750 .unwrap_or(0);
751
752 let mut stmt = conn.prepare(
753 r#"
754 SELECT path, COUNT(DISTINCT session_hash) as visitors
755 FROM analytics_events
756 WHERE timestamp >= ?1
757 GROUP BY path
758 ORDER BY visitors DESC
759 LIMIT 10
760 "#,
761 )?;
762 let current_pages: Vec<ActivePage> = stmt
763 .query_map([&five_min_ago], |row| {
764 Ok(ActivePage {
765 path: row.get(0)?,
766 visitors: row.get(1)?,
767 })
768 })?
769 .filter_map(|r| r.ok())
770 .collect();
771
772 let mut stmt = conn.prepare(
774 r#"
775 SELECT referrer_domain,
776 CAST((julianday('now') - julianday(timestamp)) * 86400 AS INTEGER) as seconds_ago
777 FROM analytics_events
778 WHERE timestamp >= ?1 AND referrer_domain IS NOT NULL
779 ORDER BY timestamp DESC
780 LIMIT 5
781 "#,
782 )?;
783 let recent_referrers: Vec<RecentReferrer> = stmt
784 .query_map([&five_min_ago], |row| {
785 Ok(RecentReferrer {
786 domain: row.get(0)?,
787 seconds_ago: row.get(1)?,
788 })
789 })?
790 .filter_map(|r| r.ok())
791 .collect();
792
793 Ok(RealtimeStats {
794 active_sessions,
795 pageviews_30min,
796 current_pages,
797 recent_referrers,
798 })
799 }
800
801 pub fn get_content_stats(&self, content_id: i64) -> Result<ContentStats> {
802 let conn = self.db.get()?;
803
804 let cached: Option<(
806 i64,
807 i64,
808 Option<String>,
809 Option<String>,
810 String,
811 Option<f64>,
812 )> = conn
813 .query_row(
814 r#"
815 SELECT total_pageviews, unique_sessions, first_viewed_at, last_viewed_at,
816 COALESCE(top_referrers, '[]'), bounce_rate
817 FROM analytics_content
818 WHERE content_id = ?1
819 "#,
820 [content_id],
821 |row| {
822 Ok((
823 row.get(0)?,
824 row.get(1)?,
825 row.get(2)?,
826 row.get(3)?,
827 row.get(4)?,
828 row.get(5)?,
829 ))
830 },
831 )
832 .ok();
833
834 if let Some((
835 total_pageviews,
836 unique_sessions,
837 first_viewed_at,
838 last_viewed_at,
839 referrers_json,
840 bounce_rate,
841 )) = cached
842 {
843 let mut stmt = conn.prepare(
845 r#"
846 SELECT date(timestamp) as day, COUNT(*) as views
847 FROM analytics_events
848 WHERE content_id = ?1 AND timestamp >= datetime('now', '-30 days')
849 GROUP BY day
850 ORDER BY day ASC
851 "#,
852 )?;
853 let view_trend: Vec<i64> = stmt
854 .query_map([content_id], |row| row.get::<_, i64>(1))?
855 .filter_map(|r| r.ok())
856 .collect();
857
858 let referrer_data: Vec<(String, i64)> =
859 serde_json::from_str(&referrers_json).unwrap_or_default();
860 let total_ref: i64 = referrer_data.iter().map(|(_, c)| c).sum();
861 let top_referrers: Vec<ReferrerStats> = referrer_data
862 .into_iter()
863 .map(|(domain, sessions)| ReferrerStats {
864 domain,
865 sessions,
866 percentage: if total_ref > 0 {
867 (sessions as f64 / total_ref as f64) * 100.0
868 } else {
869 0.0
870 },
871 })
872 .collect();
873
874 return Ok(ContentStats {
875 content_id,
876 total_pageviews,
877 unique_sessions,
878 first_viewed_at,
879 last_viewed_at,
880 view_trend,
881 top_referrers,
882 bounce_rate: bounce_rate.unwrap_or(0.0),
883 });
884 }
885
886 let (total_pageviews, unique_sessions): (i64, i64) = conn
888 .query_row(
889 r#"
890 SELECT COUNT(*), COUNT(DISTINCT session_hash)
891 FROM analytics_events
892 WHERE content_id = ?1
893 "#,
894 [content_id],
895 |row| Ok((row.get(0)?, row.get(1)?)),
896 )
897 .unwrap_or((0, 0));
898
899 let first_viewed_at: Option<String> = conn
900 .query_row(
901 "SELECT MIN(timestamp) FROM analytics_events WHERE content_id = ?1",
902 [content_id],
903 |row| row.get(0),
904 )
905 .ok();
906
907 let last_viewed_at: Option<String> = conn
908 .query_row(
909 "SELECT MAX(timestamp) FROM analytics_events WHERE content_id = ?1",
910 [content_id],
911 |row| row.get(0),
912 )
913 .ok();
914
915 let mut stmt = conn.prepare(
916 r#"
917 SELECT date(timestamp) as day, COUNT(*) as views
918 FROM analytics_events
919 WHERE content_id = ?1 AND timestamp >= datetime('now', '-30 days')
920 GROUP BY day
921 ORDER BY day ASC
922 "#,
923 )?;
924 let view_trend: Vec<i64> = stmt
925 .query_map([content_id], |row| row.get::<_, i64>(1))?
926 .filter_map(|r| r.ok())
927 .collect();
928
929 Ok(ContentStats {
930 content_id,
931 total_pageviews,
932 unique_sessions,
933 first_viewed_at,
934 last_viewed_at,
935 view_trend,
936 top_referrers: vec![],
937 bounce_rate: 0.0,
938 })
939 }
940
941 pub fn get_content_performance(
943 &self,
944 days: i64,
945 limit: i64,
946 ) -> Result<Vec<ContentPerformance>> {
947 let conn = self.db.get()?;
948
949 let cutoff = (chrono::Utc::now() - chrono::TimeDelta::days(days))
950 .format("%Y-%m-%dT%H:%M:%SZ")
951 .to_string();
952 let prev_cutoff = (chrono::Utc::now() - chrono::TimeDelta::days(days * 2))
953 .format("%Y-%m-%dT%H:%M:%SZ")
954 .to_string();
955
956 let mut stmt = conn.prepare(
957 r#"
958 SELECT
959 c.id,
960 c.title,
961 c.content_type,
962 c.slug,
963 COUNT(e.id) as pageviews,
964 COUNT(DISTINCT e.session_hash) as sessions,
965 COALESCE(ac.bounce_rate, 0) as bounce_rate
966 FROM content c
967 LEFT JOIN analytics_events e ON e.content_id = c.id AND e.timestamp >= ?1
968 LEFT JOIN analytics_content ac ON ac.content_id = c.id
969 WHERE c.status = 'published'
970 GROUP BY c.id
971 HAVING pageviews > 0
972 ORDER BY pageviews DESC
973 LIMIT ?2
974 "#,
975 )?;
976
977 let mut results: Vec<ContentPerformance> = stmt
978 .query_map(rusqlite::params![&cutoff, limit], |row| {
979 Ok(ContentPerformance {
980 content_id: row.get(0)?,
981 title: row.get(1)?,
982 content_type: row.get(2)?,
983 slug: row.get(3)?,
984 pageviews: row.get(4)?,
985 unique_sessions: row.get(5)?,
986 avg_time_seconds: 0, bounce_rate: row.get(6)?,
988 trend: "stable".to_string(),
989 trend_percent: 0.0,
990 })
991 })?
992 .filter_map(|r| r.ok())
993 .collect();
994
995 for content in &mut results {
997 let prev_views: i64 = conn
998 .query_row(
999 r#"
1000 SELECT COUNT(*)
1001 FROM analytics_events
1002 WHERE content_id = ?1 AND timestamp >= ?2 AND timestamp < ?3
1003 "#,
1004 rusqlite::params![content.content_id, &prev_cutoff, &cutoff],
1005 |row| row.get(0),
1006 )
1007 .unwrap_or(0);
1008
1009 if prev_views > 0 {
1010 let change = ((content.pageviews - prev_views) as f64 / prev_views as f64) * 100.0;
1011 content.trend_percent = change;
1012 content.trend = if change > 10.0 {
1013 "up".to_string()
1014 } else if change < -10.0 {
1015 "down".to_string()
1016 } else {
1017 "stable".to_string()
1018 };
1019 }
1020 }
1021
1022 Ok(results)
1023 }
1024
1025 pub fn aggregate_hourly(&self) -> Result<usize> {
1026 let conn = self.db.get()?;
1027
1028 let count = conn.execute(
1029 r#"
1030 INSERT INTO analytics_hourly (hour, path, content_id, content_type,
1031 pageviews, unique_sessions, avg_response_time_ms, error_count)
1032 SELECT
1033 strftime('%Y-%m-%dT%H:00:00Z', timestamp) as hour,
1034 path,
1035 content_id,
1036 content_type,
1037 COUNT(*) as pageviews,
1038 COUNT(DISTINCT session_hash) as unique_sessions,
1039 AVG(response_time_ms) as avg_response_time_ms,
1040 SUM(CASE WHEN status_code >= 400 THEN 1 ELSE 0 END) as error_count
1041 FROM analytics_events
1042 WHERE timestamp < strftime('%Y-%m-%dT%H:00:00Z', 'now')
1043 AND timestamp >= datetime('now', '-2 hours')
1044 GROUP BY hour, path, content_id, content_type
1045 ON CONFLICT(hour, path) DO UPDATE SET
1046 pageviews = analytics_hourly.pageviews + excluded.pageviews,
1047 unique_sessions = analytics_hourly.unique_sessions + excluded.unique_sessions,
1048 error_count = analytics_hourly.error_count + excluded.error_count
1049 "#,
1050 [],
1051 )?;
1052
1053 let retention_hours = self.config.raw_event_retention_hours as i64;
1055 conn.execute(
1056 &format!(
1057 "DELETE FROM analytics_events WHERE timestamp < datetime('now', '-{} hours')",
1058 retention_hours
1059 ),
1060 [],
1061 )?;
1062
1063 Ok(count)
1064 }
1065
1066 pub fn aggregate_daily(&self) -> Result<usize> {
1068 let conn = self.db.get()?;
1069
1070 let yesterday = (chrono::Utc::now() - chrono::TimeDelta::days(1))
1071 .format("%Y-%m-%d")
1072 .to_string();
1073
1074 let exists: bool = conn
1076 .query_row(
1077 "SELECT EXISTS(SELECT 1 FROM analytics_daily WHERE date = ?1)",
1078 [&yesterday],
1079 |row| row.get(0),
1080 )
1081 .unwrap_or(false);
1082
1083 if exists {
1084 return Ok(0);
1085 }
1086
1087 let start = format!("{}T00:00:00Z", yesterday);
1089 let end = format!("{}T23:59:59Z", yesterday);
1090
1091 let (total_pageviews, unique_sessions, avg_response, error_count): (
1092 i64,
1093 i64,
1094 Option<f64>,
1095 i64,
1096 ) = conn
1097 .query_row(
1098 r#"
1099 SELECT
1100 COALESCE(SUM(pageviews), 0),
1101 COALESCE(SUM(unique_sessions), 0),
1102 AVG(avg_response_time_ms),
1103 COALESCE(SUM(error_count), 0)
1104 FROM analytics_hourly
1105 WHERE hour >= ?1 AND hour <= ?2
1106 "#,
1107 [&start, &end],
1108 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
1109 )
1110 .unwrap_or((0, 0, None, 0));
1111
1112 let mut stmt = conn.prepare(
1114 r#"
1115 SELECT path, SUM(pageviews) as views, SUM(unique_sessions) as sessions
1116 FROM analytics_hourly
1117 WHERE hour >= ?1 AND hour <= ?2
1118 GROUP BY path
1119 ORDER BY views DESC
1120 LIMIT 10
1121 "#,
1122 )?;
1123 let top_pages: Vec<PageStats> = stmt
1124 .query_map([&start, &end], |row| {
1125 Ok(PageStats {
1126 path: row.get(0)?,
1127 title: None,
1128 content_type: None,
1129 pageviews: row.get(1)?,
1130 unique_sessions: row.get(2)?,
1131 })
1132 })?
1133 .filter_map(|r| r.ok())
1134 .collect();
1135
1136 let mut stmt = conn.prepare(
1138 r#"
1139 SELECT path, SUM(pageviews) as views, SUM(unique_sessions) as sessions
1140 FROM analytics_hourly
1141 WHERE hour >= ?1 AND hour <= ?2 AND content_type = 'post'
1142 GROUP BY path
1143 ORDER BY views DESC
1144 LIMIT 10
1145 "#,
1146 )?;
1147 let top_posts: Vec<PageStats> = stmt
1148 .query_map([&start, &end], |row| {
1149 Ok(PageStats {
1150 path: row.get(0)?,
1151 title: None,
1152 content_type: Some("post".to_string()),
1153 pageviews: row.get(1)?,
1154 unique_sessions: row.get(2)?,
1155 })
1156 })?
1157 .filter_map(|r| r.ok())
1158 .collect();
1159
1160 let mut referrers: HashMap<String, i64> = HashMap::new();
1162 let mut stmt = conn.prepare(
1163 r#"
1164 SELECT referrer_domain, COUNT(*) as count
1165 FROM analytics_events
1166 WHERE timestamp >= ?1 AND timestamp <= ?2 AND referrer_domain IS NOT NULL
1167 GROUP BY referrer_domain
1168 "#,
1169 )?;
1170 for row in stmt.query_map([&start, &end], |row| {
1171 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1172 })? {
1173 if let Ok((domain, count)) = row {
1174 referrers.insert(domain, count);
1175 }
1176 }
1177
1178 let mut countries: HashMap<String, i64> = HashMap::new();
1179 let mut stmt = conn.prepare(
1180 r#"
1181 SELECT country_code, COUNT(*) as count
1182 FROM analytics_events
1183 WHERE timestamp >= ?1 AND timestamp <= ?2 AND country_code IS NOT NULL
1184 GROUP BY country_code
1185 "#,
1186 )?;
1187 for row in stmt.query_map([&start, &end], |row| {
1188 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1189 })? {
1190 if let Ok((code, count)) = row {
1191 countries.insert(code, count);
1192 }
1193 }
1194
1195 let mut devices: HashMap<String, i64> = HashMap::new();
1196 let mut stmt = conn.prepare(
1197 r#"
1198 SELECT device_type, COUNT(*) as count
1199 FROM analytics_events
1200 WHERE timestamp >= ?1 AND timestamp <= ?2
1201 GROUP BY device_type
1202 "#,
1203 )?;
1204 for row in stmt.query_map([&start, &end], |row| {
1205 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1206 })? {
1207 if let Ok((device, count)) = row {
1208 devices.insert(device, count);
1209 }
1210 }
1211
1212 let mut browsers: HashMap<String, i64> = HashMap::new();
1213 let mut stmt = conn.prepare(
1214 r#"
1215 SELECT browser_family, COUNT(*) as count
1216 FROM analytics_events
1217 WHERE timestamp >= ?1 AND timestamp <= ?2
1218 GROUP BY browser_family
1219 "#,
1220 )?;
1221 for row in stmt.query_map([&start, &end], |row| {
1222 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1223 })? {
1224 if let Ok((browser, count)) = row {
1225 browsers.insert(browser, count);
1226 }
1227 }
1228
1229 let new_content_views: i64 = conn
1231 .query_row(
1232 r#"
1233 SELECT COUNT(*)
1234 FROM analytics_events e
1235 JOIN content c ON e.content_id = c.id
1236 WHERE e.timestamp >= ?1 AND e.timestamp <= ?2
1237 AND c.created_at >= datetime('now', '-7 days')
1238 "#,
1239 [&start, &end],
1240 |row| row.get(0),
1241 )
1242 .unwrap_or(0);
1243
1244 let error_rate = if total_pageviews > 0 {
1245 (error_count as f64 / total_pageviews as f64) * 100.0
1246 } else {
1247 0.0
1248 };
1249
1250 conn.execute(
1252 r#"
1253 INSERT INTO analytics_daily (date, total_pageviews, unique_sessions, top_pages, top_posts,
1254 referrers, countries, devices, browsers, avg_response_time_ms,
1255 error_rate, new_content_views)
1256 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)
1257 "#,
1258 rusqlite::params![
1259 yesterday,
1260 total_pageviews,
1261 unique_sessions,
1262 serde_json::to_string(&top_pages)?,
1263 serde_json::to_string(&top_posts)?,
1264 serde_json::to_string(&referrers)?,
1265 serde_json::to_string(&countries)?,
1266 serde_json::to_string(&devices)?,
1267 serde_json::to_string(&browsers)?,
1268 avg_response.map(|v| v as i64),
1269 error_rate,
1270 new_content_views,
1271 ],
1272 )?;
1273
1274 self.update_bounce_rates()?;
1276
1277 if self.config.hourly_retention_days > 0 {
1279 conn.execute(
1280 &format!(
1281 "DELETE FROM analytics_hourly WHERE hour < datetime('now', '-{} days')",
1282 self.config.hourly_retention_days
1283 ),
1284 [],
1285 )?;
1286 }
1287
1288 if self.config.daily_retention_days > 0 {
1290 conn.execute(
1291 &format!(
1292 "DELETE FROM analytics_daily WHERE date < date('now', '-{} days')",
1293 self.config.daily_retention_days
1294 ),
1295 [],
1296 )?;
1297 }
1298
1299 Ok(1)
1300 }
1301
1302 fn update_bounce_rates(&self) -> Result<()> {
1304 let conn = self.db.get()?;
1305
1306 conn.execute(
1308 r#"
1309 UPDATE analytics_content
1310 SET bounce_rate = (
1311 SELECT CAST(
1312 SUM(CASE WHEN session_pageviews = 1 THEN 1 ELSE 0 END) AS REAL
1313 ) / NULLIF(COUNT(DISTINCT session_hash), 0) * 100
1314 FROM (
1315 SELECT session_hash, COUNT(*) as session_pageviews
1316 FROM analytics_events
1317 WHERE content_id = analytics_content.content_id
1318 GROUP BY session_hash
1319 )
1320 )
1321 WHERE content_id IN (SELECT DISTINCT content_id FROM analytics_events WHERE content_id IS NOT NULL)
1322 "#,
1323 [],
1324 )?;
1325
1326 Ok(())
1327 }
1328
1329 pub fn cleanup_old_data(&self, hourly_retention_days: i64) -> Result<()> {
1330 let conn = self.db.get()?;
1331
1332 conn.execute(
1333 "DELETE FROM analytics_hourly WHERE hour < datetime('now', ?1)",
1334 [format!("-{} days", hourly_retention_days)],
1335 )?;
1336
1337 Ok(())
1338 }
1339
1340 pub fn export(&self, days: i64, format: ExportFormat) -> Result<String> {
1342 let now = chrono::Utc::now();
1343 let start_date = (now - chrono::TimeDelta::days(days))
1344 .format("%Y-%m-%d")
1345 .to_string();
1346 let end_date = now.format("%Y-%m-%d").to_string();
1347
1348 let summary = self.get_summary(days)?;
1349
1350 let conn = self.db.get()?;
1351
1352 let mut stmt = conn.prepare(
1354 r#"
1355 SELECT date, total_pageviews, unique_sessions, top_pages, top_posts,
1356 referrers, countries, devices, browsers, avg_response_time_ms,
1357 error_rate, new_content_views
1358 FROM analytics_daily
1359 WHERE date >= ?1 AND date <= ?2
1360 ORDER BY date ASC
1361 "#,
1362 )?;
1363
1364 let daily_stats: Vec<DailyStats> = stmt
1365 .query_map([&start_date, &end_date], |row| {
1366 let top_pages_json: String = row.get(3)?;
1367 let top_posts_json: String = row.get(4)?;
1368 let referrers_json: String = row.get(5)?;
1369 let countries_json: String = row.get(6)?;
1370 let devices_json: String = row.get(7)?;
1371 let browsers_json: String = row.get(8)?;
1372
1373 Ok(DailyStats {
1374 date: row.get(0)?,
1375 total_pageviews: row.get(1)?,
1376 unique_sessions: row.get(2)?,
1377 top_pages: serde_json::from_str(&top_pages_json).unwrap_or_default(),
1378 top_posts: serde_json::from_str(&top_posts_json).unwrap_or_default(),
1379 referrers: serde_json::from_str(&referrers_json).unwrap_or_default(),
1380 countries: serde_json::from_str(&countries_json).unwrap_or_default(),
1381 devices: serde_json::from_str(&devices_json).unwrap_or_default(),
1382 browsers: serde_json::from_str(&browsers_json).unwrap_or_default(),
1383 avg_response_time_ms: row.get::<_, Option<i64>>(9)?.unwrap_or(0),
1384 error_rate: row.get::<_, Option<f64>>(10)?.unwrap_or(0.0),
1385 new_content_views: row.get(11)?,
1386 })
1387 })?
1388 .filter_map(|r| r.ok())
1389 .collect();
1390
1391 let export = AnalyticsExport {
1392 exported_at: now.format("%Y-%m-%dT%H:%M:%SZ").to_string(),
1393 date_range: DateRange {
1394 start: start_date,
1395 end: end_date,
1396 },
1397 summary: ExportSummary {
1398 total_pageviews: summary.total_pageviews,
1399 unique_sessions: summary.unique_sessions,
1400 avg_response_time_ms: summary.avg_response_time_ms,
1401 },
1402 daily_stats,
1403 top_pages: summary.top_pages,
1404 referrers: summary.top_referrers,
1405 countries: summary.countries,
1406 };
1407
1408 match format {
1409 ExportFormat::Json => Ok(serde_json::to_string_pretty(&export)?),
1410 ExportFormat::Csv => {
1411 let mut csv = String::new();
1412 csv.push_str("date,pageviews,sessions,avg_response_ms,error_rate\n");
1413 for day in &export.daily_stats {
1414 csv.push_str(&format!(
1415 "{},{},{},{},{:.2}\n",
1416 day.date,
1417 day.total_pageviews,
1418 day.unique_sessions,
1419 day.avg_response_time_ms,
1420 day.error_rate
1421 ));
1422 }
1423 Ok(csv)
1424 }
1425 }
1426 }
1427}
1428
1429pub fn generate_session_hash(ip: &str, user_agent: &str, daily_salt: &str) -> String {
1430 let anonymized_ip = anonymize_ip(ip);
1431 let browser = extract_browser_family(user_agent);
1432
1433 let input = format!("{}|{}|{}", daily_salt, anonymized_ip, browser);
1434 let mut hasher = Sha256::new();
1435 hasher.update(input.as_bytes());
1436 let result = hasher.finalize();
1437 hex::encode(&result[..8])
1438}
1439
1440pub fn get_daily_salt(db: &Database) -> Result<String> {
1441 let conn = db.get()?;
1442 let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
1443 let key = format!("session_salt_{}", today);
1444
1445 let existing: Option<String> = conn
1446 .query_row(
1447 "SELECT value FROM analytics_settings WHERE key = ?1",
1448 [&key],
1449 |row| row.get(0),
1450 )
1451 .ok();
1452
1453 if let Some(salt) = existing {
1454 return Ok(salt);
1455 }
1456
1457 let salt: String = (0..32)
1458 .map(|_| format!("{:02x}", rand::random::<u8>()))
1459 .collect();
1460
1461 conn.execute(
1462 "INSERT OR REPLACE INTO analytics_settings (key, value) VALUES (?1, ?2)",
1463 [&key, &salt],
1464 )?;
1465
1466 conn.execute(
1467 "DELETE FROM analytics_settings WHERE key LIKE 'session_salt_%' AND key != ?1",
1468 [&key],
1469 )?;
1470
1471 Ok(salt)
1472}
1473
1474fn anonymize_ip(ip: &str) -> String {
1475 if ip.contains(':') {
1476 let parts: Vec<&str> = ip.split(':').collect();
1478 if parts.len() >= 4 {
1479 return format!("{}:{}:{}:*", parts[0], parts[1], parts[2]);
1480 }
1481 } else {
1482 let parts: Vec<&str> = ip.split('.').collect();
1484 if parts.len() == 4 {
1485 return format!("{}.{}.0.0", parts[0], parts[1]);
1486 }
1487 }
1488 "unknown".to_string()
1489}
1490
1491pub fn extract_browser_family(user_agent: &str) -> String {
1492 let ua = user_agent.to_lowercase();
1493 if ua.contains("firefox") {
1494 "Firefox".to_string()
1495 } else if ua.contains("edg/") || ua.contains("edge") {
1496 "Edge".to_string()
1497 } else if ua.contains("chrome") || ua.contains("chromium") {
1498 "Chrome".to_string()
1499 } else if ua.contains("safari") {
1500 "Safari".to_string()
1501 } else if ua.contains("opera") || ua.contains("opr/") {
1502 "Opera".to_string()
1503 } else {
1504 "Other".to_string()
1505 }
1506}
1507
1508pub fn extract_device_type(user_agent: &str) -> DeviceType {
1509 let ua = user_agent.to_lowercase();
1510 if ua.contains("mobile") || ua.contains("android") && !ua.contains("tablet") {
1511 DeviceType::Mobile
1512 } else if ua.contains("tablet") || ua.contains("ipad") {
1513 DeviceType::Tablet
1514 } else {
1515 DeviceType::Desktop
1516 }
1517}
1518
1519pub fn extract_referrer_domain(referrer: &str) -> Option<String> {
1520 if referrer.is_empty() {
1521 return None;
1522 }
1523 url::Url::parse(referrer)
1524 .ok()
1525 .and_then(|u| u.host_str().map(|h| h.to_string()))
1526}
1527
1528pub fn lookup_country(ip: &str) -> Option<String> {
1531 let parts: Vec<u8> = ip.split('.').filter_map(|p| p.parse().ok()).collect();
1533
1534 if parts.len() != 4 {
1535 return None;
1536 }
1537
1538 let first_octet = parts[0];
1539 let second_octet = parts[1];
1540
1541 if first_octet == 10
1544 || first_octet == 127
1545 || (first_octet == 172 && (16..=31).contains(&second_octet))
1546 || (first_octet == 192 && second_octet == 168)
1547 || first_octet == 169
1548 {
1549 return Some("LO".to_string());
1550 }
1551
1552 match first_octet {
1555 77..=95 | 145..=151 | 176..=185 | 193..=195 => match second_octet {
1557 0..=50 => Some("DE".to_string()),
1558 51..=100 => Some("GB".to_string()),
1559 101..=150 => Some("FR".to_string()),
1560 151..=200 => Some("NL".to_string()),
1561 _ => Some("EU".to_string()),
1562 },
1563
1564 1 | 2 | 27 | 36..=39 | 42..=49 | 58..=61 | 101..=126 | 202..=223 => match second_octet {
1566 0..=50 => Some("JP".to_string()),
1567 51..=100 => Some("CN".to_string()),
1568 101..=150 => Some("AU".to_string()),
1569 151..=200 => Some("IN".to_string()),
1570 _ => Some("AP".to_string()),
1571 },
1572
1573 3..=26
1575 | 28..=35
1576 | 40
1577 | 41
1578 | 50..=57
1579 | 63..=76
1580 | 96..=100
1581 | 128..=144
1582 | 152..=175
1583 | 186..=191
1584 | 196..=201 => {
1585 if second_octet < 128 {
1586 Some("US".to_string())
1587 } else {
1588 Some("CA".to_string())
1589 }
1590 }
1591
1592 _ => None,
1593 }
1594}
1595
1596fn country_name(code: &str) -> String {
1597 match code {
1598 "LO" => "Local/Private",
1599 "US" => "United States",
1600 "GB" => "United Kingdom",
1601 "DE" => "Germany",
1602 "FR" => "France",
1603 "CA" => "Canada",
1604 "AU" => "Australia",
1605 "JP" => "Japan",
1606 "CN" => "China",
1607 "IN" => "India",
1608 "BR" => "Brazil",
1609 "NL" => "Netherlands",
1610 "ES" => "Spain",
1611 "IT" => "Italy",
1612 "SE" => "Sweden",
1613 "NO" => "Norway",
1614 "DK" => "Denmark",
1615 "FI" => "Finland",
1616 "PL" => "Poland",
1617 "RU" => "Russia",
1618 "KR" => "South Korea",
1619 "MX" => "Mexico",
1620 "AR" => "Argentina",
1621 "ZA" => "South Africa",
1622 "NG" => "Nigeria",
1623 "EG" => "Egypt",
1624 "SG" => "Singapore",
1625 "HK" => "Hong Kong",
1626 "TW" => "Taiwan",
1627 "NZ" => "New Zealand",
1628 "IE" => "Ireland",
1629 "CH" => "Switzerland",
1630 "AT" => "Austria",
1631 "BE" => "Belgium",
1632 "PT" => "Portugal",
1633 "EU" => "Europe",
1634 "AP" => "Asia-Pacific",
1635 _ => code,
1636 }
1637 .to_string()
1638}
1639
1640pub async fn run_aggregation_job(analytics: Arc<Analytics>) {
1642 let mut hourly_interval = tokio::time::interval(std::time::Duration::from_secs(3600)); let mut daily_check = tokio::time::interval(std::time::Duration::from_secs(3600 * 6)); loop {
1646 tokio::select! {
1647 _ = hourly_interval.tick() => {
1648 match analytics.aggregate_hourly() {
1650 Ok(count) => {
1651 if count > 0 {
1652 tracing::info!("Analytics: aggregated {} hourly records", count);
1653 }
1654 }
1655 Err(e) => {
1656 tracing::error!("Analytics hourly aggregation failed: {}", e);
1657 }
1658 }
1659
1660 if let Err(e) = analytics.cleanup_old_data(90) {
1661 tracing::error!("Analytics cleanup failed: {}", e);
1662 }
1663 }
1664 _ = daily_check.tick() => {
1665 let now = chrono::Utc::now();
1667 if now.hour() < 6 {
1669 match analytics.aggregate_daily() {
1670 Ok(count) => {
1671 if count > 0 {
1672 tracing::info!("Analytics: completed daily aggregation");
1673 }
1674 }
1675 Err(e) => {
1676 tracing::error!("Analytics daily aggregation failed: {}", e);
1677 }
1678 }
1679 }
1680 }
1681 }
1682 }
1683}
1684
1685#[cfg(test)]
1686mod tests {
1687 use super::*;
1688
1689 #[test]
1690 fn test_analytics_config_should_track() {
1691 let config = AnalyticsConfig::default();
1692
1693 assert!(config.should_track("/posts/hello"));
1694 assert!(config.should_track("/"));
1695 assert!(!config.should_track("/admin"));
1696 assert!(!config.should_track("/admin/posts"));
1697 assert!(!config.should_track("/static/style.css"));
1698 assert!(!config.should_track("/media/image.jpg"));
1699 assert!(!config.should_track("/robots.txt"));
1700 }
1701
1702 #[test]
1703 fn test_analytics_config_dnt() {
1704 let config = AnalyticsConfig::default();
1705
1706 assert!(config.should_respect_dnt(Some("1")));
1707 assert!(!config.should_respect_dnt(Some("0")));
1708 assert!(!config.should_respect_dnt(None));
1709
1710 let config_no_dnt = AnalyticsConfig {
1711 respect_dnt: false,
1712 ..Default::default()
1713 };
1714 assert!(!config_no_dnt.should_respect_dnt(Some("1")));
1715 }
1716
1717 #[test]
1718 fn test_lookup_country() {
1719 assert_eq!(lookup_country("8.8.8.8"), Some("US".to_string()));
1721 assert_eq!(lookup_country("4.4.4.4"), Some("US".to_string()));
1722
1723 assert_eq!(lookup_country("80.80.80.80"), Some("GB".to_string()));
1725
1726 assert_eq!(lookup_country("192.168.1.1"), Some("LO".to_string()));
1728 assert_eq!(lookup_country("10.0.0.1"), Some("LO".to_string()));
1729 assert_eq!(lookup_country("127.0.0.1"), Some("LO".to_string()));
1730 assert_eq!(lookup_country("172.16.0.1"), Some("LO".to_string()));
1731 assert_eq!(lookup_country("172.31.255.255"), Some("LO".to_string()));
1732
1733 assert_eq!(lookup_country("invalid"), None);
1735 assert_eq!(lookup_country("256.1.1.1"), None);
1736 }
1737
1738 #[test]
1739 fn test_anonymize_ip() {
1740 assert_eq!(anonymize_ip("192.168.1.100"), "192.168.0.0");
1741 assert_eq!(anonymize_ip("10.20.30.40"), "10.20.0.0");
1742 assert_eq!(
1743 anonymize_ip("2001:db8:85a3:8d3:1319:8a2e:370:7348"),
1744 "2001:db8:85a3:*"
1745 );
1746 assert_eq!(anonymize_ip("invalid"), "unknown");
1747 }
1748
1749 #[test]
1750 fn test_extract_browser_family() {
1751 assert_eq!(
1752 extract_browser_family(
1753 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/91.0"
1754 ),
1755 "Chrome"
1756 );
1757 assert_eq!(
1758 extract_browser_family(
1759 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
1760 ),
1761 "Firefox"
1762 );
1763 assert_eq!(
1764 extract_browser_family("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15"),
1765 "Safari"
1766 );
1767 }
1768
1769 #[test]
1770 fn test_extract_device_type() {
1771 assert!(matches!(
1772 extract_device_type("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"),
1773 DeviceType::Desktop
1774 ));
1775 assert!(matches!(
1776 extract_device_type("Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) Mobile"),
1777 DeviceType::Mobile
1778 ));
1779 assert!(matches!(
1780 extract_device_type("Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X)"),
1781 DeviceType::Tablet
1782 ));
1783 }
1784}