Skip to main content

pebble_cms/services/
analytics.rs

1use crate::Database;
2use anyhow::Result;
3use chrono::Timelike;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::collections::HashMap;
7use std::sync::Arc;
8
9/// Analytics configuration options
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AnalyticsConfig {
12    /// Enable/disable analytics collection
13    pub enabled: bool,
14    /// Hours to keep raw events before aggregation cleanup
15    pub raw_event_retention_hours: u32,
16    /// Days to keep hourly aggregates
17    pub hourly_retention_days: u32,
18    /// Days to keep daily aggregates (0 = forever)
19    pub daily_retention_days: u32,
20    /// Session timeout in minutes
21    pub session_timeout_minutes: u32,
22    /// Paths to exclude from tracking
23    pub excluded_paths: Vec<String>,
24    /// Path prefixes to exclude
25    pub excluded_prefixes: Vec<String>,
26    /// Enable geographic lookup
27    pub geo_lookup: bool,
28    /// Respect Do Not Track header
29    pub respect_dnt: bool,
30    /// Sample rate (1.0 = 100%, 0.1 = 10%)
31    pub sample_rate: f64,
32}
33
34impl Default for AnalyticsConfig {
35    fn default() -> Self {
36        Self {
37            enabled: true,
38            raw_event_retention_hours: 48,
39            hourly_retention_days: 90,
40            daily_retention_days: 0, // Keep forever
41            session_timeout_minutes: 30,
42            excluded_paths: vec![
43                "/health".into(),
44                "/robots.txt".into(),
45                "/favicon.ico".into(),
46            ],
47            excluded_prefixes: vec![
48                "/admin".into(),
49                "/api".into(),
50                "/static".into(),
51                "/media".into(),
52                "/_".into(),
53            ],
54            geo_lookup: true,
55            respect_dnt: true,
56            sample_rate: 1.0,
57        }
58    }
59}
60
61impl AnalyticsConfig {
62    /// Check if a path should be tracked
63    pub fn should_track(&self, path: &str) -> bool {
64        if !self.enabled {
65            return false;
66        }
67
68        // Check exact path matches
69        if self.excluded_paths.iter().any(|p| p == path) {
70            return false;
71        }
72
73        // Check prefix matches
74        if self
75            .excluded_prefixes
76            .iter()
77            .any(|prefix| path.starts_with(prefix))
78        {
79            return false;
80        }
81
82        // Check sample rate
83        if self.sample_rate < 1.0 {
84            use rand::Rng;
85            if rand::thread_rng().gen::<f64>() > self.sample_rate {
86                return false;
87            }
88        }
89
90        true
91    }
92
93    /// Check if DNT should be respected
94    pub fn should_respect_dnt(&self, dnt_header: Option<&str>) -> bool {
95        if !self.respect_dnt {
96            return false;
97        }
98        matches!(dnt_header, Some("1"))
99    }
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct AnalyticsEvent {
104    pub path: String,
105    pub referrer_domain: Option<String>,
106    pub country_code: Option<String>,
107    pub device_type: DeviceType,
108    pub browser_family: String,
109    pub session_hash: String,
110    pub response_time_ms: Option<i64>,
111    pub status_code: u16,
112    pub content_id: Option<i64>,
113    pub content_type: Option<String>,
114}
115
116#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
117#[serde(rename_all = "lowercase")]
118pub enum DeviceType {
119    #[default]
120    Desktop,
121    Mobile,
122    Tablet,
123}
124
125impl std::fmt::Display for DeviceType {
126    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127        match self {
128            DeviceType::Desktop => write!(f, "desktop"),
129            DeviceType::Mobile => write!(f, "mobile"),
130            DeviceType::Tablet => write!(f, "tablet"),
131        }
132    }
133}
134
135#[derive(Debug, Clone, Serialize)]
136pub struct DashboardSummary {
137    pub total_pageviews: i64,
138    pub unique_sessions: i64,
139    pub pageviews_change: f64,
140    pub sessions_change: f64,
141    pub avg_response_time_ms: i64,
142    pub error_rate: f64,
143    pub top_pages: Vec<PageStats>,
144    pub top_referrers: Vec<ReferrerStats>,
145    pub devices: HashMap<String, i64>,
146    pub browsers: HashMap<String, i64>,
147    pub total_devices: i64,
148    pub total_browsers: i64,
149    pub countries: Vec<CountryStats>,
150    pub pageviews_over_time: Vec<TimeSeriesPoint>,
151    pub pageviews_max: i64,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct PageStats {
156    pub path: String,
157    pub title: Option<String>,
158    pub content_type: Option<String>,
159    pub pageviews: i64,
160    pub unique_sessions: i64,
161}
162
163#[derive(Debug, Clone, Serialize)]
164pub struct ReferrerStats {
165    pub domain: String,
166    pub sessions: i64,
167    pub percentage: f64,
168}
169
170#[derive(Debug, Clone, Serialize)]
171pub struct CountryStats {
172    pub code: String,
173    pub name: String,
174    pub sessions: i64,
175    pub percentage: f64,
176}
177
178#[derive(Debug, Clone, Serialize)]
179pub struct TimeSeriesPoint {
180    pub timestamp: String,
181    pub value: i64,
182}
183
184#[derive(Debug, Clone, Serialize)]
185pub struct RealtimeStats {
186    pub active_sessions: i64,
187    pub pageviews_30min: i64,
188    pub current_pages: Vec<ActivePage>,
189    pub recent_referrers: Vec<RecentReferrer>,
190}
191
192#[derive(Debug, Clone, Serialize)]
193pub struct ActivePage {
194    pub path: String,
195    pub visitors: i64,
196}
197
198#[derive(Debug, Clone, Serialize)]
199pub struct RecentReferrer {
200    pub domain: String,
201    pub seconds_ago: i64,
202}
203
204#[derive(Debug, Clone, Serialize)]
205pub struct ContentStats {
206    pub content_id: i64,
207    pub total_pageviews: i64,
208    pub unique_sessions: i64,
209    pub first_viewed_at: Option<String>,
210    pub last_viewed_at: Option<String>,
211    pub view_trend: Vec<i64>,
212    pub top_referrers: Vec<ReferrerStats>,
213    pub bounce_rate: f64,
214}
215
216/// Content performance data for dashboard
217#[derive(Debug, Clone, Serialize)]
218pub struct ContentPerformance {
219    pub content_id: i64,
220    pub title: String,
221    pub content_type: String,
222    pub slug: String,
223    pub pageviews: i64,
224    pub unique_sessions: i64,
225    pub avg_time_seconds: i64,
226    pub bounce_rate: f64,
227    pub trend: String, // "up", "down", "stable"
228    pub trend_percent: f64,
229}
230
231/// Daily aggregation data
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct DailyStats {
234    pub date: String,
235    pub total_pageviews: i64,
236    pub unique_sessions: i64,
237    pub top_pages: Vec<PageStats>,
238    pub top_posts: Vec<PageStats>,
239    pub referrers: HashMap<String, i64>,
240    pub countries: HashMap<String, i64>,
241    pub devices: HashMap<String, i64>,
242    pub browsers: HashMap<String, i64>,
243    pub avg_response_time_ms: i64,
244    pub error_rate: f64,
245    pub new_content_views: i64,
246}
247
248/// Export format options
249#[derive(Debug, Clone, Copy)]
250pub enum ExportFormat {
251    Json,
252    Csv,
253}
254
255/// Exported analytics data
256#[derive(Debug, Clone, Serialize)]
257pub struct AnalyticsExport {
258    pub exported_at: String,
259    pub date_range: DateRange,
260    pub summary: ExportSummary,
261    pub daily_stats: Vec<DailyStats>,
262    pub top_pages: Vec<PageStats>,
263    pub referrers: Vec<ReferrerStats>,
264    pub countries: Vec<CountryStats>,
265}
266
267#[derive(Debug, Clone, Serialize)]
268pub struct ExportSummary {
269    pub total_pageviews: i64,
270    pub unique_sessions: i64,
271    pub avg_response_time_ms: i64,
272}
273
274#[derive(Debug, Clone, Serialize)]
275pub struct DateRange {
276    pub start: String,
277    pub end: String,
278}
279
280pub struct Analytics {
281    db: Database,
282    config: AnalyticsConfig,
283}
284
285impl Analytics {
286    pub fn new(db: Database) -> Self {
287        Self {
288            db,
289            config: AnalyticsConfig::default(),
290        }
291    }
292
293    pub fn with_config(db: Database, config: AnalyticsConfig) -> Self {
294        Self { db, config }
295    }
296
297    pub fn config(&self) -> &AnalyticsConfig {
298        &self.config
299    }
300
301    /// Check if tracking should be performed for this request
302    pub fn should_track(&self, path: &str, dnt_header: Option<&str>) -> bool {
303        if self.config.should_respect_dnt(dnt_header) {
304            return false;
305        }
306        self.config.should_track(path)
307    }
308
309    pub fn record_event(&self, event: &AnalyticsEvent) -> Result<()> {
310        let conn = self.db.get()?;
311        conn.execute(
312            r#"
313            INSERT INTO analytics_events
314                (path, referrer_domain, country_code, device_type, browser_family,
315                 session_hash, response_time_ms, status_code, content_id, content_type)
316            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
317            "#,
318            rusqlite::params![
319                event.path,
320                event.referrer_domain,
321                event.country_code,
322                event.device_type.to_string(),
323                event.browser_family,
324                event.session_hash,
325                event.response_time_ms,
326                event.status_code as i64,
327                event.content_id,
328                event.content_type,
329            ],
330        )?;
331
332        // Update content analytics if we have a content_id
333        if let Some(content_id) = event.content_id {
334            self.update_content_analytics(
335                content_id,
336                &event.session_hash,
337                event.referrer_domain.as_deref(),
338            )?;
339        }
340
341        Ok(())
342    }
343
344    /// Update the analytics_content table for a specific content item
345    fn update_content_analytics(
346        &self,
347        content_id: i64,
348        session_hash: &str,
349        referrer: Option<&str>,
350    ) -> Result<()> {
351        let conn = self.db.get()?;
352        let now = chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
353
354        // Check if this is a new session for this content
355        let is_new_session: bool = conn
356            .query_row(
357                r#"
358                SELECT COUNT(*) = 0
359                FROM analytics_events
360                WHERE content_id = ?1 AND session_hash = ?2
361                  AND timestamp >= datetime('now', '-30 minutes')
362                  AND id != (SELECT MAX(id) FROM analytics_events WHERE content_id = ?1 AND session_hash = ?2)
363                "#,
364                rusqlite::params![content_id, session_hash],
365                |row| row.get::<_, bool>(0),
366            )
367            .unwrap_or(true);
368
369        // Upsert into analytics_content
370        conn.execute(
371            r#"
372            INSERT INTO analytics_content (content_id, total_pageviews, unique_sessions, first_viewed_at, last_viewed_at)
373            VALUES (?1, 1, ?2, ?3, ?3)
374            ON CONFLICT(content_id) DO UPDATE SET
375                total_pageviews = total_pageviews + 1,
376                unique_sessions = unique_sessions + ?2,
377                last_viewed_at = ?3
378            "#,
379            rusqlite::params![content_id, if is_new_session { 1 } else { 0 }, &now],
380        )?;
381
382        // Update top referrers for this content if we have a referrer
383        if let Some(domain) = referrer {
384            let existing_referrers: String = conn
385                .query_row(
386                    "SELECT top_referrers FROM analytics_content WHERE content_id = ?1",
387                    [content_id],
388                    |row| row.get(0),
389                )
390                .unwrap_or_else(|_| "[]".to_string());
391
392            let mut referrers: Vec<(String, i64)> =
393                serde_json::from_str(&existing_referrers).unwrap_or_default();
394
395            // Find and update or add the referrer
396            if let Some(entry) = referrers.iter_mut().find(|(d, _)| d == domain) {
397                entry.1 += 1;
398            } else {
399                referrers.push((domain.to_string(), 1));
400            }
401
402            // Sort by count and keep top 10
403            referrers.sort_by(|a, b| b.1.cmp(&a.1));
404            referrers.truncate(10);
405
406            let referrers_json = serde_json::to_string(&referrers)?;
407            conn.execute(
408                "UPDATE analytics_content SET top_referrers = ?1 WHERE content_id = ?2",
409                rusqlite::params![referrers_json, content_id],
410            )?;
411        }
412
413        Ok(())
414    }
415
416    pub fn get_summary(&self, days: i64) -> Result<DashboardSummary> {
417        let conn = self.db.get()?;
418
419        let now = chrono::Utc::now();
420        let cutoff = (now - chrono::TimeDelta::days(days))
421            .format("%Y-%m-%dT%H:%M:%SZ")
422            .to_string();
423        let prev_cutoff = (now - chrono::TimeDelta::days(days * 2))
424            .format("%Y-%m-%dT%H:%M:%SZ")
425            .to_string();
426
427        tracing::debug!("Analytics query: days={}, cutoff={}", days, cutoff);
428
429        // Query both analytics_events (recent) and analytics_hourly (aggregated) tables
430        let (events_pageviews, events_sessions, events_avg_response, events_errors): (
431            i64,
432            i64,
433            Option<f64>,
434            i64,
435        ) = conn
436            .query_row(
437                r#"
438                SELECT
439                    COUNT(*) as pageviews,
440                    COUNT(DISTINCT session_hash) as sessions,
441                    AVG(response_time_ms) as avg_response,
442                    SUM(CASE WHEN status_code >= 400 THEN 1 ELSE 0 END) as errors
443                FROM analytics_events
444                WHERE timestamp >= ?1
445                "#,
446                [&cutoff],
447                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
448            )
449            .unwrap_or((0, 0, None, 0));
450
451        let (hourly_pageviews, hourly_sessions, hourly_avg_response, hourly_errors): (
452            i64,
453            i64,
454            Option<f64>,
455            i64,
456        ) = conn
457            .query_row(
458                r#"
459                SELECT
460                    COALESCE(SUM(pageviews), 0) as pageviews,
461                    COALESCE(SUM(unique_sessions), 0) as sessions,
462                    AVG(avg_response_time_ms) as avg_response,
463                    COALESCE(SUM(error_count), 0) as errors
464                FROM analytics_hourly
465                WHERE hour >= ?1
466                "#,
467                [&cutoff],
468                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
469            )
470            .unwrap_or((0, 0, None, 0));
471
472        let total_pageviews = events_pageviews + hourly_pageviews;
473        let unique_sessions = events_sessions + hourly_sessions;
474        let avg_response_time = events_avg_response
475            .or(hourly_avg_response)
476            .map(|v| v as i64);
477        let error_count = events_errors + hourly_errors;
478
479        let (prev_events_pv, prev_events_sess): (i64, i64) = conn
480            .query_row(
481                r#"
482                SELECT
483                    COUNT(*) as pageviews,
484                    COUNT(DISTINCT session_hash) as sessions
485                FROM analytics_events
486                WHERE timestamp >= ?1 AND timestamp < ?2
487                "#,
488                [&prev_cutoff, &cutoff],
489                |row| Ok((row.get(0)?, row.get(1)?)),
490            )
491            .unwrap_or((0, 0));
492
493        let (prev_hourly_pv, prev_hourly_sess): (i64, i64) = conn
494            .query_row(
495                r#"
496                SELECT
497                    COALESCE(SUM(pageviews), 0) as pageviews,
498                    COALESCE(SUM(unique_sessions), 0) as sessions
499                FROM analytics_hourly
500                WHERE hour >= ?1 AND hour < ?2
501                "#,
502                [&prev_cutoff, &cutoff],
503                |row| Ok((row.get(0)?, row.get(1)?)),
504            )
505            .unwrap_or((0, 0));
506
507        let prev_pageviews = prev_events_pv + prev_hourly_pv;
508        let prev_sessions = prev_events_sess + prev_hourly_sess;
509
510        let pageviews_change = if prev_pageviews > 0 {
511            ((total_pageviews - prev_pageviews) as f64 / prev_pageviews as f64) * 100.0
512        } else {
513            0.0
514        };
515
516        let sessions_change = if prev_sessions > 0 {
517            ((unique_sessions - prev_sessions) as f64 / prev_sessions as f64) * 100.0
518        } else {
519            0.0
520        };
521
522        let error_rate = if total_pageviews > 0 {
523            (error_count as f64 / total_pageviews as f64) * 100.0
524        } else {
525            0.0
526        };
527
528        // Top pages from both tables combined
529        let mut stmt = conn.prepare(
530            r#"
531            SELECT path, SUM(views) as total_views, SUM(sessions) as total_sessions
532            FROM (
533                SELECT path, COUNT(*) as views, COUNT(DISTINCT session_hash) as sessions
534                FROM analytics_events
535                WHERE timestamp >= ?1
536                GROUP BY path
537                UNION ALL
538                SELECT path, pageviews as views, unique_sessions as sessions
539                FROM analytics_hourly
540                WHERE hour >= ?1
541            )
542            GROUP BY path
543            ORDER BY total_views DESC
544            LIMIT 10
545            "#,
546        )?;
547        let top_pages: Vec<PageStats> = stmt
548            .query_map([&cutoff], |row| {
549                Ok(PageStats {
550                    path: row.get(0)?,
551                    title: None,
552                    content_type: None,
553                    pageviews: row.get(1)?,
554                    unique_sessions: row.get(2)?,
555                })
556            })?
557            .filter_map(|r| r.ok())
558            .collect();
559
560        let mut stmt = conn.prepare(
561            r#"
562            SELECT referrer_domain, COUNT(DISTINCT session_hash) as sessions
563            FROM analytics_events
564            WHERE timestamp >= ?1 AND referrer_domain IS NOT NULL
565            GROUP BY referrer_domain
566            ORDER BY sessions DESC
567            LIMIT 10
568            "#,
569        )?;
570        let referrer_rows: Vec<(String, i64)> = stmt
571            .query_map([&cutoff], |row| Ok((row.get(0)?, row.get(1)?)))?
572            .filter_map(|r| r.ok())
573            .collect();
574
575        let referrer_total: i64 = referrer_rows.iter().map(|(_, s)| s).sum();
576        let top_referrers: Vec<ReferrerStats> = referrer_rows
577            .into_iter()
578            .map(|(domain, sessions)| ReferrerStats {
579                domain,
580                sessions,
581                percentage: if referrer_total > 0 {
582                    (sessions as f64 / referrer_total as f64) * 100.0
583                } else {
584                    0.0
585                },
586            })
587            .collect();
588
589        let mut devices: HashMap<String, i64> = HashMap::new();
590        let mut stmt = conn.prepare(
591            r#"
592            SELECT device_type, COUNT(*) as count
593            FROM analytics_events
594            WHERE timestamp >= ?1
595            GROUP BY device_type
596            ORDER BY count DESC
597            LIMIT 20
598            "#,
599        )?;
600        for row in stmt.query_map([&cutoff], |row| {
601            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
602        })? {
603            if let Ok((device, count)) = row {
604                devices.insert(device, count);
605            }
606        }
607
608        let mut browsers: HashMap<String, i64> = HashMap::new();
609        let mut stmt = conn.prepare(
610            r#"
611            SELECT browser_family, COUNT(*) as count
612            FROM analytics_events
613            WHERE timestamp >= ?1
614            GROUP BY browser_family
615            ORDER BY count DESC
616            LIMIT 50
617            "#,
618        )?;
619        for row in stmt.query_map([&cutoff], |row| {
620            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
621        })? {
622            if let Ok((browser, count)) = row {
623                browsers.insert(browser, count);
624            }
625        }
626
627        let mut stmt = conn.prepare(
628            r#"
629            SELECT country_code, COUNT(DISTINCT session_hash) as sessions
630            FROM analytics_events
631            WHERE timestamp >= ?1 AND country_code IS NOT NULL
632            GROUP BY country_code
633            ORDER BY sessions DESC
634            LIMIT 10
635            "#,
636        )?;
637        let country_rows: Vec<(String, i64)> = stmt
638            .query_map([&cutoff], |row| Ok((row.get(0)?, row.get(1)?)))?
639            .filter_map(|r| r.ok())
640            .collect();
641
642        let country_total: i64 = country_rows.iter().map(|(_, s)| s).sum();
643        let countries: Vec<CountryStats> = country_rows
644            .into_iter()
645            .map(|(code, sessions)| CountryStats {
646                name: country_name(&code),
647                code,
648                sessions,
649                percentage: if country_total > 0 {
650                    (sessions as f64 / country_total as f64) * 100.0
651                } else {
652                    0.0
653                },
654            })
655            .collect();
656
657        // Pageviews over time from both tables
658        let mut stmt = conn.prepare(
659            r#"
660            SELECT day, SUM(views) as total_views
661            FROM (
662                SELECT date(timestamp) as day, COUNT(*) as views
663                FROM analytics_events
664                WHERE timestamp >= ?1
665                GROUP BY day
666                UNION ALL
667                SELECT date(hour) as day, pageviews as views
668                FROM analytics_hourly
669                WHERE hour >= ?1
670            )
671            GROUP BY day
672            ORDER BY day ASC
673            "#,
674        )?;
675        let pageviews_over_time: Vec<TimeSeriesPoint> = stmt
676            .query_map([&cutoff], |row| {
677                Ok(TimeSeriesPoint {
678                    timestamp: row.get(0)?,
679                    value: row.get::<_, i64>(1)?,
680                })
681            })?
682            .filter_map(|r| {
683                r.map_err(|e| tracing::error!("Pageviews over time query error: {}", e))
684                    .ok()
685            })
686            .collect();
687
688        let pageviews_max = pageviews_over_time
689            .iter()
690            .map(|p| p.value)
691            .max()
692            .unwrap_or(1)
693            .max(1);
694
695        let total_devices: i64 = devices.values().sum();
696        let total_browsers: i64 = browsers.values().sum();
697
698        Ok(DashboardSummary {
699            total_pageviews,
700            unique_sessions,
701            pageviews_change,
702            sessions_change,
703            avg_response_time_ms: avg_response_time.unwrap_or(0),
704            error_rate,
705            top_pages,
706            top_referrers,
707            devices,
708            browsers,
709            total_devices,
710            total_browsers,
711            countries,
712            pageviews_over_time,
713            pageviews_max,
714        })
715    }
716
717    pub fn get_realtime(&self) -> Result<RealtimeStats> {
718        let conn = self.db.get()?;
719
720        let now = chrono::Utc::now();
721        let five_min_ago = (now - chrono::TimeDelta::minutes(5))
722            .format("%Y-%m-%dT%H:%M:%SZ")
723            .to_string();
724        let thirty_min_ago = (now - chrono::TimeDelta::minutes(30))
725            .format("%Y-%m-%dT%H:%M:%SZ")
726            .to_string();
727
728        let active_sessions: i64 = conn
729            .query_row(
730                r#"
731                SELECT COUNT(DISTINCT session_hash)
732                FROM analytics_events
733                WHERE timestamp >= ?1
734                "#,
735                [&five_min_ago],
736                |row| row.get(0),
737            )
738            .unwrap_or(0);
739
740        let pageviews_30min: i64 = conn
741            .query_row(
742                r#"
743                SELECT COUNT(*)
744                FROM analytics_events
745                WHERE timestamp >= ?1
746                "#,
747                [&thirty_min_ago],
748                |row| row.get(0),
749            )
750            .unwrap_or(0);
751
752        let mut stmt = conn.prepare(
753            r#"
754            SELECT path, COUNT(DISTINCT session_hash) as visitors
755            FROM analytics_events
756            WHERE timestamp >= ?1
757            GROUP BY path
758            ORDER BY visitors DESC
759            LIMIT 10
760            "#,
761        )?;
762        let current_pages: Vec<ActivePage> = stmt
763            .query_map([&five_min_ago], |row| {
764                Ok(ActivePage {
765                    path: row.get(0)?,
766                    visitors: row.get(1)?,
767                })
768            })?
769            .filter_map(|r| r.ok())
770            .collect();
771
772        // Recent referrers with time ago
773        let mut stmt = conn.prepare(
774            r#"
775            SELECT referrer_domain,
776                   CAST((julianday('now') - julianday(timestamp)) * 86400 AS INTEGER) as seconds_ago
777            FROM analytics_events
778            WHERE timestamp >= ?1 AND referrer_domain IS NOT NULL
779            ORDER BY timestamp DESC
780            LIMIT 5
781            "#,
782        )?;
783        let recent_referrers: Vec<RecentReferrer> = stmt
784            .query_map([&five_min_ago], |row| {
785                Ok(RecentReferrer {
786                    domain: row.get(0)?,
787                    seconds_ago: row.get(1)?,
788                })
789            })?
790            .filter_map(|r| r.ok())
791            .collect();
792
793        Ok(RealtimeStats {
794            active_sessions,
795            pageviews_30min,
796            current_pages,
797            recent_referrers,
798        })
799    }
800
801    pub fn get_content_stats(&self, content_id: i64) -> Result<ContentStats> {
802        let conn = self.db.get()?;
803
804        // Try to get from analytics_content first
805        let cached: Option<(
806            i64,
807            i64,
808            Option<String>,
809            Option<String>,
810            String,
811            Option<f64>,
812        )> = conn
813            .query_row(
814                r#"
815                SELECT total_pageviews, unique_sessions, first_viewed_at, last_viewed_at,
816                       COALESCE(top_referrers, '[]'), bounce_rate
817                FROM analytics_content
818                WHERE content_id = ?1
819                "#,
820                [content_id],
821                |row| {
822                    Ok((
823                        row.get(0)?,
824                        row.get(1)?,
825                        row.get(2)?,
826                        row.get(3)?,
827                        row.get(4)?,
828                        row.get(5)?,
829                    ))
830                },
831            )
832            .ok();
833
834        if let Some((
835            total_pageviews,
836            unique_sessions,
837            first_viewed_at,
838            last_viewed_at,
839            referrers_json,
840            bounce_rate,
841        )) = cached
842        {
843            // Get view trend from events
844            let mut stmt = conn.prepare(
845                r#"
846                SELECT date(timestamp) as day, COUNT(*) as views
847                FROM analytics_events
848                WHERE content_id = ?1 AND timestamp >= datetime('now', '-30 days')
849                GROUP BY day
850                ORDER BY day ASC
851                "#,
852            )?;
853            let view_trend: Vec<i64> = stmt
854                .query_map([content_id], |row| row.get::<_, i64>(1))?
855                .filter_map(|r| r.ok())
856                .collect();
857
858            let referrer_data: Vec<(String, i64)> =
859                serde_json::from_str(&referrers_json).unwrap_or_default();
860            let total_ref: i64 = referrer_data.iter().map(|(_, c)| c).sum();
861            let top_referrers: Vec<ReferrerStats> = referrer_data
862                .into_iter()
863                .map(|(domain, sessions)| ReferrerStats {
864                    domain,
865                    sessions,
866                    percentage: if total_ref > 0 {
867                        (sessions as f64 / total_ref as f64) * 100.0
868                    } else {
869                        0.0
870                    },
871                })
872                .collect();
873
874            return Ok(ContentStats {
875                content_id,
876                total_pageviews,
877                unique_sessions,
878                first_viewed_at,
879                last_viewed_at,
880                view_trend,
881                top_referrers,
882                bounce_rate: bounce_rate.unwrap_or(0.0),
883            });
884        }
885
886        // Fall back to querying events directly
887        let (total_pageviews, unique_sessions): (i64, i64) = conn
888            .query_row(
889                r#"
890                SELECT COUNT(*), COUNT(DISTINCT session_hash)
891                FROM analytics_events
892                WHERE content_id = ?1
893                "#,
894                [content_id],
895                |row| Ok((row.get(0)?, row.get(1)?)),
896            )
897            .unwrap_or((0, 0));
898
899        let first_viewed_at: Option<String> = conn
900            .query_row(
901                "SELECT MIN(timestamp) FROM analytics_events WHERE content_id = ?1",
902                [content_id],
903                |row| row.get(0),
904            )
905            .ok();
906
907        let last_viewed_at: Option<String> = conn
908            .query_row(
909                "SELECT MAX(timestamp) FROM analytics_events WHERE content_id = ?1",
910                [content_id],
911                |row| row.get(0),
912            )
913            .ok();
914
915        let mut stmt = conn.prepare(
916            r#"
917            SELECT date(timestamp) as day, COUNT(*) as views
918            FROM analytics_events
919            WHERE content_id = ?1 AND timestamp >= datetime('now', '-30 days')
920            GROUP BY day
921            ORDER BY day ASC
922            "#,
923        )?;
924        let view_trend: Vec<i64> = stmt
925            .query_map([content_id], |row| row.get::<_, i64>(1))?
926            .filter_map(|r| r.ok())
927            .collect();
928
929        Ok(ContentStats {
930            content_id,
931            total_pageviews,
932            unique_sessions,
933            first_viewed_at,
934            last_viewed_at,
935            view_trend,
936            top_referrers: vec![],
937            bounce_rate: 0.0,
938        })
939    }
940
941    /// Get content performance data for dashboard
942    pub fn get_content_performance(
943        &self,
944        days: i64,
945        limit: i64,
946    ) -> Result<Vec<ContentPerformance>> {
947        let conn = self.db.get()?;
948
949        let cutoff = (chrono::Utc::now() - chrono::TimeDelta::days(days))
950            .format("%Y-%m-%dT%H:%M:%SZ")
951            .to_string();
952        let prev_cutoff = (chrono::Utc::now() - chrono::TimeDelta::days(days * 2))
953            .format("%Y-%m-%dT%H:%M:%SZ")
954            .to_string();
955
956        let mut stmt = conn.prepare(
957            r#"
958            SELECT
959                c.id,
960                c.title,
961                c.content_type,
962                c.slug,
963                COUNT(e.id) as pageviews,
964                COUNT(DISTINCT e.session_hash) as sessions,
965                COALESCE(ac.bounce_rate, 0) as bounce_rate
966            FROM content c
967            LEFT JOIN analytics_events e ON e.content_id = c.id AND e.timestamp >= ?1
968            LEFT JOIN analytics_content ac ON ac.content_id = c.id
969            WHERE c.status = 'published'
970            GROUP BY c.id
971            HAVING pageviews > 0
972            ORDER BY pageviews DESC
973            LIMIT ?2
974            "#,
975        )?;
976
977        let mut results: Vec<ContentPerformance> = stmt
978            .query_map(rusqlite::params![&cutoff, limit], |row| {
979                Ok(ContentPerformance {
980                    content_id: row.get(0)?,
981                    title: row.get(1)?,
982                    content_type: row.get(2)?,
983                    slug: row.get(3)?,
984                    pageviews: row.get(4)?,
985                    unique_sessions: row.get(5)?,
986                    avg_time_seconds: 0, // Will be calculated if we have session data
987                    bounce_rate: row.get(6)?,
988                    trend: "stable".to_string(),
989                    trend_percent: 0.0,
990                })
991            })?
992            .filter_map(|r| r.ok())
993            .collect();
994
995        // Calculate trends by comparing to previous period
996        for content in &mut results {
997            let prev_views: i64 = conn
998                .query_row(
999                    r#"
1000                    SELECT COUNT(*)
1001                    FROM analytics_events
1002                    WHERE content_id = ?1 AND timestamp >= ?2 AND timestamp < ?3
1003                    "#,
1004                    rusqlite::params![content.content_id, &prev_cutoff, &cutoff],
1005                    |row| row.get(0),
1006                )
1007                .unwrap_or(0);
1008
1009            if prev_views > 0 {
1010                let change = ((content.pageviews - prev_views) as f64 / prev_views as f64) * 100.0;
1011                content.trend_percent = change;
1012                content.trend = if change > 10.0 {
1013                    "up".to_string()
1014                } else if change < -10.0 {
1015                    "down".to_string()
1016                } else {
1017                    "stable".to_string()
1018                };
1019            }
1020        }
1021
1022        Ok(results)
1023    }
1024
1025    pub fn aggregate_hourly(&self) -> Result<usize> {
1026        let conn = self.db.get()?;
1027
1028        let count = conn.execute(
1029            r#"
1030            INSERT INTO analytics_hourly (hour, path, content_id, content_type,
1031                                          pageviews, unique_sessions, avg_response_time_ms, error_count)
1032            SELECT
1033                strftime('%Y-%m-%dT%H:00:00Z', timestamp) as hour,
1034                path,
1035                content_id,
1036                content_type,
1037                COUNT(*) as pageviews,
1038                COUNT(DISTINCT session_hash) as unique_sessions,
1039                AVG(response_time_ms) as avg_response_time_ms,
1040                SUM(CASE WHEN status_code >= 400 THEN 1 ELSE 0 END) as error_count
1041            FROM analytics_events
1042            WHERE timestamp < strftime('%Y-%m-%dT%H:00:00Z', 'now')
1043              AND timestamp >= datetime('now', '-2 hours')
1044            GROUP BY hour, path, content_id, content_type
1045            ON CONFLICT(hour, path) DO UPDATE SET
1046                pageviews = analytics_hourly.pageviews + excluded.pageviews,
1047                unique_sessions = analytics_hourly.unique_sessions + excluded.unique_sessions,
1048                error_count = analytics_hourly.error_count + excluded.error_count
1049            "#,
1050            [],
1051        )?;
1052
1053        // Clean up old raw events based on config
1054        let retention_hours = self.config.raw_event_retention_hours as i64;
1055        conn.execute(
1056            &format!(
1057                "DELETE FROM analytics_events WHERE timestamp < datetime('now', '-{} hours')",
1058                retention_hours
1059            ),
1060            [],
1061        )?;
1062
1063        Ok(count)
1064    }
1065
1066    /// Aggregate daily statistics
1067    pub fn aggregate_daily(&self) -> Result<usize> {
1068        let conn = self.db.get()?;
1069
1070        let yesterday = (chrono::Utc::now() - chrono::TimeDelta::days(1))
1071            .format("%Y-%m-%d")
1072            .to_string();
1073
1074        // Check if already aggregated
1075        let exists: bool = conn
1076            .query_row(
1077                "SELECT EXISTS(SELECT 1 FROM analytics_daily WHERE date = ?1)",
1078                [&yesterday],
1079                |row| row.get(0),
1080            )
1081            .unwrap_or(false);
1082
1083        if exists {
1084            return Ok(0);
1085        }
1086
1087        // Compute daily stats from hourly data
1088        let start = format!("{}T00:00:00Z", yesterday);
1089        let end = format!("{}T23:59:59Z", yesterday);
1090
1091        let (total_pageviews, unique_sessions, avg_response, error_count): (
1092            i64,
1093            i64,
1094            Option<f64>,
1095            i64,
1096        ) = conn
1097            .query_row(
1098                r#"
1099                SELECT
1100                    COALESCE(SUM(pageviews), 0),
1101                    COALESCE(SUM(unique_sessions), 0),
1102                    AVG(avg_response_time_ms),
1103                    COALESCE(SUM(error_count), 0)
1104                FROM analytics_hourly
1105                WHERE hour >= ?1 AND hour <= ?2
1106                "#,
1107                [&start, &end],
1108                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
1109            )
1110            .unwrap_or((0, 0, None, 0));
1111
1112        // Top pages
1113        let mut stmt = conn.prepare(
1114            r#"
1115            SELECT path, SUM(pageviews) as views, SUM(unique_sessions) as sessions
1116            FROM analytics_hourly
1117            WHERE hour >= ?1 AND hour <= ?2
1118            GROUP BY path
1119            ORDER BY views DESC
1120            LIMIT 10
1121            "#,
1122        )?;
1123        let top_pages: Vec<PageStats> = stmt
1124            .query_map([&start, &end], |row| {
1125                Ok(PageStats {
1126                    path: row.get(0)?,
1127                    title: None,
1128                    content_type: None,
1129                    pageviews: row.get(1)?,
1130                    unique_sessions: row.get(2)?,
1131                })
1132            })?
1133            .filter_map(|r| r.ok())
1134            .collect();
1135
1136        // Top posts (filter by content_type)
1137        let mut stmt = conn.prepare(
1138            r#"
1139            SELECT path, SUM(pageviews) as views, SUM(unique_sessions) as sessions
1140            FROM analytics_hourly
1141            WHERE hour >= ?1 AND hour <= ?2 AND content_type = 'post'
1142            GROUP BY path
1143            ORDER BY views DESC
1144            LIMIT 10
1145            "#,
1146        )?;
1147        let top_posts: Vec<PageStats> = stmt
1148            .query_map([&start, &end], |row| {
1149                Ok(PageStats {
1150                    path: row.get(0)?,
1151                    title: None,
1152                    content_type: Some("post".to_string()),
1153                    pageviews: row.get(1)?,
1154                    unique_sessions: row.get(2)?,
1155                })
1156            })?
1157            .filter_map(|r| r.ok())
1158            .collect();
1159
1160        // Get referrers, countries, devices, browsers from events (if still available)
1161        let mut referrers: HashMap<String, i64> = HashMap::new();
1162        let mut stmt = conn.prepare(
1163            r#"
1164            SELECT referrer_domain, COUNT(*) as count
1165            FROM analytics_events
1166            WHERE timestamp >= ?1 AND timestamp <= ?2 AND referrer_domain IS NOT NULL
1167            GROUP BY referrer_domain
1168            "#,
1169        )?;
1170        for row in stmt.query_map([&start, &end], |row| {
1171            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1172        })? {
1173            if let Ok((domain, count)) = row {
1174                referrers.insert(domain, count);
1175            }
1176        }
1177
1178        let mut countries: HashMap<String, i64> = HashMap::new();
1179        let mut stmt = conn.prepare(
1180            r#"
1181            SELECT country_code, COUNT(*) as count
1182            FROM analytics_events
1183            WHERE timestamp >= ?1 AND timestamp <= ?2 AND country_code IS NOT NULL
1184            GROUP BY country_code
1185            "#,
1186        )?;
1187        for row in stmt.query_map([&start, &end], |row| {
1188            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1189        })? {
1190            if let Ok((code, count)) = row {
1191                countries.insert(code, count);
1192            }
1193        }
1194
1195        let mut devices: HashMap<String, i64> = HashMap::new();
1196        let mut stmt = conn.prepare(
1197            r#"
1198            SELECT device_type, COUNT(*) as count
1199            FROM analytics_events
1200            WHERE timestamp >= ?1 AND timestamp <= ?2
1201            GROUP BY device_type
1202            "#,
1203        )?;
1204        for row in stmt.query_map([&start, &end], |row| {
1205            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1206        })? {
1207            if let Ok((device, count)) = row {
1208                devices.insert(device, count);
1209            }
1210        }
1211
1212        let mut browsers: HashMap<String, i64> = HashMap::new();
1213        let mut stmt = conn.prepare(
1214            r#"
1215            SELECT browser_family, COUNT(*) as count
1216            FROM analytics_events
1217            WHERE timestamp >= ?1 AND timestamp <= ?2
1218            GROUP BY browser_family
1219            "#,
1220        )?;
1221        for row in stmt.query_map([&start, &end], |row| {
1222            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1223        })? {
1224            if let Ok((browser, count)) = row {
1225                browsers.insert(browser, count);
1226            }
1227        }
1228
1229        // Count views of new content (< 7 days old)
1230        let new_content_views: i64 = conn
1231            .query_row(
1232                r#"
1233                SELECT COUNT(*)
1234                FROM analytics_events e
1235                JOIN content c ON e.content_id = c.id
1236                WHERE e.timestamp >= ?1 AND e.timestamp <= ?2
1237                  AND c.created_at >= datetime('now', '-7 days')
1238                "#,
1239                [&start, &end],
1240                |row| row.get(0),
1241            )
1242            .unwrap_or(0);
1243
1244        let error_rate = if total_pageviews > 0 {
1245            (error_count as f64 / total_pageviews as f64) * 100.0
1246        } else {
1247            0.0
1248        };
1249
1250        // Insert daily record
1251        conn.execute(
1252            r#"
1253            INSERT INTO analytics_daily (date, total_pageviews, unique_sessions, top_pages, top_posts,
1254                                         referrers, countries, devices, browsers, avg_response_time_ms,
1255                                         error_rate, new_content_views)
1256            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)
1257            "#,
1258            rusqlite::params![
1259                yesterday,
1260                total_pageviews,
1261                unique_sessions,
1262                serde_json::to_string(&top_pages)?,
1263                serde_json::to_string(&top_posts)?,
1264                serde_json::to_string(&referrers)?,
1265                serde_json::to_string(&countries)?,
1266                serde_json::to_string(&devices)?,
1267                serde_json::to_string(&browsers)?,
1268                avg_response.map(|v| v as i64),
1269                error_rate,
1270                new_content_views,
1271            ],
1272        )?;
1273
1274        // Update content analytics bounce rates
1275        self.update_bounce_rates()?;
1276
1277        // Clean up old hourly data based on config
1278        if self.config.hourly_retention_days > 0 {
1279            conn.execute(
1280                &format!(
1281                    "DELETE FROM analytics_hourly WHERE hour < datetime('now', '-{} days')",
1282                    self.config.hourly_retention_days
1283                ),
1284                [],
1285            )?;
1286        }
1287
1288        // Clean up old daily data based on config
1289        if self.config.daily_retention_days > 0 {
1290            conn.execute(
1291                &format!(
1292                    "DELETE FROM analytics_daily WHERE date < date('now', '-{} days')",
1293                    self.config.daily_retention_days
1294                ),
1295                [],
1296            )?;
1297        }
1298
1299        Ok(1)
1300    }
1301
1302    /// Update bounce rates for all content in analytics_content
1303    fn update_bounce_rates(&self) -> Result<()> {
1304        let conn = self.db.get()?;
1305
1306        // Calculate bounce rate: single-page sessions / total sessions
1307        conn.execute(
1308            r#"
1309            UPDATE analytics_content
1310            SET bounce_rate = (
1311                SELECT CAST(
1312                    SUM(CASE WHEN session_pageviews = 1 THEN 1 ELSE 0 END) AS REAL
1313                ) / NULLIF(COUNT(DISTINCT session_hash), 0) * 100
1314                FROM (
1315                    SELECT session_hash, COUNT(*) as session_pageviews
1316                    FROM analytics_events
1317                    WHERE content_id = analytics_content.content_id
1318                    GROUP BY session_hash
1319                )
1320            )
1321            WHERE content_id IN (SELECT DISTINCT content_id FROM analytics_events WHERE content_id IS NOT NULL)
1322            "#,
1323            [],
1324        )?;
1325
1326        Ok(())
1327    }
1328
1329    pub fn cleanup_old_data(&self, hourly_retention_days: i64) -> Result<()> {
1330        let conn = self.db.get()?;
1331
1332        conn.execute(
1333            "DELETE FROM analytics_hourly WHERE hour < datetime('now', ?1)",
1334            [format!("-{} days", hourly_retention_days)],
1335        )?;
1336
1337        Ok(())
1338    }
1339
1340    /// Export analytics data
1341    pub fn export(&self, days: i64, format: ExportFormat) -> Result<String> {
1342        let now = chrono::Utc::now();
1343        let start_date = (now - chrono::TimeDelta::days(days))
1344            .format("%Y-%m-%d")
1345            .to_string();
1346        let end_date = now.format("%Y-%m-%d").to_string();
1347
1348        let summary = self.get_summary(days)?;
1349
1350        let conn = self.db.get()?;
1351
1352        // Get daily stats
1353        let mut stmt = conn.prepare(
1354            r#"
1355            SELECT date, total_pageviews, unique_sessions, top_pages, top_posts,
1356                   referrers, countries, devices, browsers, avg_response_time_ms,
1357                   error_rate, new_content_views
1358            FROM analytics_daily
1359            WHERE date >= ?1 AND date <= ?2
1360            ORDER BY date ASC
1361            "#,
1362        )?;
1363
1364        let daily_stats: Vec<DailyStats> = stmt
1365            .query_map([&start_date, &end_date], |row| {
1366                let top_pages_json: String = row.get(3)?;
1367                let top_posts_json: String = row.get(4)?;
1368                let referrers_json: String = row.get(5)?;
1369                let countries_json: String = row.get(6)?;
1370                let devices_json: String = row.get(7)?;
1371                let browsers_json: String = row.get(8)?;
1372
1373                Ok(DailyStats {
1374                    date: row.get(0)?,
1375                    total_pageviews: row.get(1)?,
1376                    unique_sessions: row.get(2)?,
1377                    top_pages: serde_json::from_str(&top_pages_json).unwrap_or_default(),
1378                    top_posts: serde_json::from_str(&top_posts_json).unwrap_or_default(),
1379                    referrers: serde_json::from_str(&referrers_json).unwrap_or_default(),
1380                    countries: serde_json::from_str(&countries_json).unwrap_or_default(),
1381                    devices: serde_json::from_str(&devices_json).unwrap_or_default(),
1382                    browsers: serde_json::from_str(&browsers_json).unwrap_or_default(),
1383                    avg_response_time_ms: row.get::<_, Option<i64>>(9)?.unwrap_or(0),
1384                    error_rate: row.get::<_, Option<f64>>(10)?.unwrap_or(0.0),
1385                    new_content_views: row.get(11)?,
1386                })
1387            })?
1388            .filter_map(|r| r.ok())
1389            .collect();
1390
1391        let export = AnalyticsExport {
1392            exported_at: now.format("%Y-%m-%dT%H:%M:%SZ").to_string(),
1393            date_range: DateRange {
1394                start: start_date,
1395                end: end_date,
1396            },
1397            summary: ExportSummary {
1398                total_pageviews: summary.total_pageviews,
1399                unique_sessions: summary.unique_sessions,
1400                avg_response_time_ms: summary.avg_response_time_ms,
1401            },
1402            daily_stats,
1403            top_pages: summary.top_pages,
1404            referrers: summary.top_referrers,
1405            countries: summary.countries,
1406        };
1407
1408        match format {
1409            ExportFormat::Json => Ok(serde_json::to_string_pretty(&export)?),
1410            ExportFormat::Csv => {
1411                let mut csv = String::new();
1412                csv.push_str("date,pageviews,sessions,avg_response_ms,error_rate\n");
1413                for day in &export.daily_stats {
1414                    csv.push_str(&format!(
1415                        "{},{},{},{},{:.2}\n",
1416                        day.date,
1417                        day.total_pageviews,
1418                        day.unique_sessions,
1419                        day.avg_response_time_ms,
1420                        day.error_rate
1421                    ));
1422                }
1423                Ok(csv)
1424            }
1425        }
1426    }
1427}
1428
1429pub fn generate_session_hash(ip: &str, user_agent: &str, daily_salt: &str) -> String {
1430    let anonymized_ip = anonymize_ip(ip);
1431    let browser = extract_browser_family(user_agent);
1432
1433    let input = format!("{}|{}|{}", daily_salt, anonymized_ip, browser);
1434    let mut hasher = Sha256::new();
1435    hasher.update(input.as_bytes());
1436    let result = hasher.finalize();
1437    hex::encode(&result[..8])
1438}
1439
1440pub fn get_daily_salt(db: &Database) -> Result<String> {
1441    let conn = db.get()?;
1442    let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
1443    let key = format!("session_salt_{}", today);
1444
1445    let existing: Option<String> = conn
1446        .query_row(
1447            "SELECT value FROM analytics_settings WHERE key = ?1",
1448            [&key],
1449            |row| row.get(0),
1450        )
1451        .ok();
1452
1453    if let Some(salt) = existing {
1454        return Ok(salt);
1455    }
1456
1457    let salt: String = (0..32)
1458        .map(|_| format!("{:02x}", rand::random::<u8>()))
1459        .collect();
1460
1461    conn.execute(
1462        "INSERT OR REPLACE INTO analytics_settings (key, value) VALUES (?1, ?2)",
1463        [&key, &salt],
1464    )?;
1465
1466    conn.execute(
1467        "DELETE FROM analytics_settings WHERE key LIKE 'session_salt_%' AND key != ?1",
1468        [&key],
1469    )?;
1470
1471    Ok(salt)
1472}
1473
1474fn anonymize_ip(ip: &str) -> String {
1475    if ip.contains(':') {
1476        // IPv6: keep first 3 segments
1477        let parts: Vec<&str> = ip.split(':').collect();
1478        if parts.len() >= 4 {
1479            return format!("{}:{}:{}:*", parts[0], parts[1], parts[2]);
1480        }
1481    } else {
1482        // IPv4: zero last two octets
1483        let parts: Vec<&str> = ip.split('.').collect();
1484        if parts.len() == 4 {
1485            return format!("{}.{}.0.0", parts[0], parts[1]);
1486        }
1487    }
1488    "unknown".to_string()
1489}
1490
1491pub fn extract_browser_family(user_agent: &str) -> String {
1492    let ua = user_agent.to_lowercase();
1493    if ua.contains("firefox") {
1494        "Firefox".to_string()
1495    } else if ua.contains("edg/") || ua.contains("edge") {
1496        "Edge".to_string()
1497    } else if ua.contains("chrome") || ua.contains("chromium") {
1498        "Chrome".to_string()
1499    } else if ua.contains("safari") {
1500        "Safari".to_string()
1501    } else if ua.contains("opera") || ua.contains("opr/") {
1502        "Opera".to_string()
1503    } else {
1504        "Other".to_string()
1505    }
1506}
1507
1508pub fn extract_device_type(user_agent: &str) -> DeviceType {
1509    let ua = user_agent.to_lowercase();
1510    if ua.contains("mobile") || ua.contains("android") && !ua.contains("tablet") {
1511        DeviceType::Mobile
1512    } else if ua.contains("tablet") || ua.contains("ipad") {
1513        DeviceType::Tablet
1514    } else {
1515        DeviceType::Desktop
1516    }
1517}
1518
1519pub fn extract_referrer_domain(referrer: &str) -> Option<String> {
1520    if referrer.is_empty() {
1521        return None;
1522    }
1523    url::Url::parse(referrer)
1524        .ok()
1525        .and_then(|u| u.host_str().map(|h| h.to_string()))
1526}
1527
1528/// Lookup country code from IP address using embedded database
1529/// This uses a simplified approach based on IP ranges for major regions
1530pub fn lookup_country(ip: &str) -> Option<String> {
1531    // Parse IPv4 address
1532    let parts: Vec<u8> = ip.split('.').filter_map(|p| p.parse().ok()).collect();
1533
1534    if parts.len() != 4 {
1535        return None;
1536    }
1537
1538    let first_octet = parts[0];
1539    let second_octet = parts[1];
1540
1541    // Check private/local ranges - return "LO" for local/private IPs
1542    // This helps with testing and identifies local traffic in dashboards
1543    if first_octet == 10
1544        || first_octet == 127
1545        || (first_octet == 172 && (16..=31).contains(&second_octet))
1546        || (first_octet == 192 && second_octet == 168)
1547        || first_octet == 169
1548    {
1549        return Some("LO".to_string());
1550    }
1551
1552    // Simplified country lookup based on common IP allocations
1553    // This is a rough approximation - for production, use MaxMind GeoLite2
1554    match first_octet {
1555        // Europe - RIPE allocations
1556        77..=95 | 145..=151 | 176..=185 | 193..=195 => match second_octet {
1557            0..=50 => Some("DE".to_string()),
1558            51..=100 => Some("GB".to_string()),
1559            101..=150 => Some("FR".to_string()),
1560            151..=200 => Some("NL".to_string()),
1561            _ => Some("EU".to_string()),
1562        },
1563
1564        // Asia-Pacific - APNIC allocations
1565        1 | 2 | 27 | 36..=39 | 42..=49 | 58..=61 | 101..=126 | 202..=223 => match second_octet {
1566            0..=50 => Some("JP".to_string()),
1567            51..=100 => Some("CN".to_string()),
1568            101..=150 => Some("AU".to_string()),
1569            151..=200 => Some("IN".to_string()),
1570            _ => Some("AP".to_string()),
1571        },
1572
1573        // North America (US/CA) - ARIN allocations
1574        3..=26
1575        | 28..=35
1576        | 40
1577        | 41
1578        | 50..=57
1579        | 63..=76
1580        | 96..=100
1581        | 128..=144
1582        | 152..=175
1583        | 186..=191
1584        | 196..=201 => {
1585            if second_octet < 128 {
1586                Some("US".to_string())
1587            } else {
1588                Some("CA".to_string())
1589            }
1590        }
1591
1592        _ => None,
1593    }
1594}
1595
1596fn country_name(code: &str) -> String {
1597    match code {
1598        "LO" => "Local/Private",
1599        "US" => "United States",
1600        "GB" => "United Kingdom",
1601        "DE" => "Germany",
1602        "FR" => "France",
1603        "CA" => "Canada",
1604        "AU" => "Australia",
1605        "JP" => "Japan",
1606        "CN" => "China",
1607        "IN" => "India",
1608        "BR" => "Brazil",
1609        "NL" => "Netherlands",
1610        "ES" => "Spain",
1611        "IT" => "Italy",
1612        "SE" => "Sweden",
1613        "NO" => "Norway",
1614        "DK" => "Denmark",
1615        "FI" => "Finland",
1616        "PL" => "Poland",
1617        "RU" => "Russia",
1618        "KR" => "South Korea",
1619        "MX" => "Mexico",
1620        "AR" => "Argentina",
1621        "ZA" => "South Africa",
1622        "NG" => "Nigeria",
1623        "EG" => "Egypt",
1624        "SG" => "Singapore",
1625        "HK" => "Hong Kong",
1626        "TW" => "Taiwan",
1627        "NZ" => "New Zealand",
1628        "IE" => "Ireland",
1629        "CH" => "Switzerland",
1630        "AT" => "Austria",
1631        "BE" => "Belgium",
1632        "PT" => "Portugal",
1633        "EU" => "Europe",
1634        "AP" => "Asia-Pacific",
1635        _ => code,
1636    }
1637    .to_string()
1638}
1639
1640/// Run hourly and daily aggregation jobs
1641pub async fn run_aggregation_job(analytics: Arc<Analytics>) {
1642    let mut hourly_interval = tokio::time::interval(std::time::Duration::from_secs(3600)); // 1 hour
1643    let mut daily_check = tokio::time::interval(std::time::Duration::from_secs(3600 * 6)); // Check every 6 hours
1644
1645    loop {
1646        tokio::select! {
1647            _ = hourly_interval.tick() => {
1648                // Hourly aggregation
1649                match analytics.aggregate_hourly() {
1650                    Ok(count) => {
1651                        if count > 0 {
1652                            tracing::info!("Analytics: aggregated {} hourly records", count);
1653                        }
1654                    }
1655                    Err(e) => {
1656                        tracing::error!("Analytics hourly aggregation failed: {}", e);
1657                    }
1658                }
1659
1660                if let Err(e) = analytics.cleanup_old_data(90) {
1661                    tracing::error!("Analytics cleanup failed: {}", e);
1662                }
1663            }
1664            _ = daily_check.tick() => {
1665                // Check if daily aggregation is needed (runs once per day)
1666                let now = chrono::Utc::now();
1667                // Only run daily aggregation after midnight (0-6 AM UTC)
1668                if now.hour() < 6 {
1669                    match analytics.aggregate_daily() {
1670                        Ok(count) => {
1671                            if count > 0 {
1672                                tracing::info!("Analytics: completed daily aggregation");
1673                            }
1674                        }
1675                        Err(e) => {
1676                            tracing::error!("Analytics daily aggregation failed: {}", e);
1677                        }
1678                    }
1679                }
1680            }
1681        }
1682    }
1683}
1684
1685#[cfg(test)]
1686mod tests {
1687    use super::*;
1688
1689    #[test]
1690    fn test_analytics_config_should_track() {
1691        let config = AnalyticsConfig::default();
1692
1693        assert!(config.should_track("/posts/hello"));
1694        assert!(config.should_track("/"));
1695        assert!(!config.should_track("/admin"));
1696        assert!(!config.should_track("/admin/posts"));
1697        assert!(!config.should_track("/static/style.css"));
1698        assert!(!config.should_track("/media/image.jpg"));
1699        assert!(!config.should_track("/robots.txt"));
1700    }
1701
1702    #[test]
1703    fn test_analytics_config_dnt() {
1704        let config = AnalyticsConfig::default();
1705
1706        assert!(config.should_respect_dnt(Some("1")));
1707        assert!(!config.should_respect_dnt(Some("0")));
1708        assert!(!config.should_respect_dnt(None));
1709
1710        let config_no_dnt = AnalyticsConfig {
1711            respect_dnt: false,
1712            ..Default::default()
1713        };
1714        assert!(!config_no_dnt.should_respect_dnt(Some("1")));
1715    }
1716
1717    #[test]
1718    fn test_lookup_country() {
1719        // US IPs
1720        assert_eq!(lookup_country("8.8.8.8"), Some("US".to_string()));
1721        assert_eq!(lookup_country("4.4.4.4"), Some("US".to_string()));
1722
1723        // Europe IPs
1724        assert_eq!(lookup_country("80.80.80.80"), Some("GB".to_string()));
1725
1726        // Private IPs should return None
1727        assert_eq!(lookup_country("192.168.1.1"), Some("LO".to_string()));
1728        assert_eq!(lookup_country("10.0.0.1"), Some("LO".to_string()));
1729        assert_eq!(lookup_country("127.0.0.1"), Some("LO".to_string()));
1730        assert_eq!(lookup_country("172.16.0.1"), Some("LO".to_string()));
1731        assert_eq!(lookup_country("172.31.255.255"), Some("LO".to_string()));
1732
1733        // Invalid IPs
1734        assert_eq!(lookup_country("invalid"), None);
1735        assert_eq!(lookup_country("256.1.1.1"), None);
1736    }
1737
1738    #[test]
1739    fn test_anonymize_ip() {
1740        assert_eq!(anonymize_ip("192.168.1.100"), "192.168.0.0");
1741        assert_eq!(anonymize_ip("10.20.30.40"), "10.20.0.0");
1742        assert_eq!(
1743            anonymize_ip("2001:db8:85a3:8d3:1319:8a2e:370:7348"),
1744            "2001:db8:85a3:*"
1745        );
1746        assert_eq!(anonymize_ip("invalid"), "unknown");
1747    }
1748
1749    #[test]
1750    fn test_extract_browser_family() {
1751        assert_eq!(
1752            extract_browser_family(
1753                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/91.0"
1754            ),
1755            "Chrome"
1756        );
1757        assert_eq!(
1758            extract_browser_family(
1759                "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
1760            ),
1761            "Firefox"
1762        );
1763        assert_eq!(
1764            extract_browser_family("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 Safari/605.1.15"),
1765            "Safari"
1766        );
1767    }
1768
1769    #[test]
1770    fn test_extract_device_type() {
1771        assert!(matches!(
1772            extract_device_type("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"),
1773            DeviceType::Desktop
1774        ));
1775        assert!(matches!(
1776            extract_device_type("Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) Mobile"),
1777            DeviceType::Mobile
1778        ));
1779        assert!(matches!(
1780            extract_device_type("Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X)"),
1781            DeviceType::Tablet
1782        ));
1783    }
1784}