Skip to main content

mockforge_analytics/
lib.rs

1//! `MockForge` Analytics
2//!
3//! Provides comprehensive traffic analytics and metrics dashboard capabilities
4//! for `MockForge`, including:
5//!
6//! - Time-series metrics aggregation (minute/hour/day granularity)
7//! - Endpoint performance tracking
8//! - Error analysis and monitoring
9//! - Client analytics
10//! - Traffic pattern detection
11//! - Data export (CSV, JSON)
12//! - Configurable retention policies
13//!
14//! # Architecture
15//!
16//! The analytics system consists of several components:
17//!
18//! - **Database**: SQLite-based storage for aggregated metrics
19//! - **Aggregator**: Background service that queries Prometheus and stores metrics
20//! - **Queries**: High-level query API for dashboard data
21//! - **Export**: Data export functionality
22//! - **Retention**: Automatic cleanup of old data
23//!
24//! # Example
25//!
26//! ```no_run
27//! use mockforge_analytics::{AnalyticsDatabase, AnalyticsConfig, RetentionConfig};
28//! use std::path::PathBuf;
29//!
30//! # async fn example() -> anyhow::Result<()> {
31//! let config = AnalyticsConfig {
32//!     enabled: true,
33//!     database_path: PathBuf::from("analytics.db"),
34//!     aggregation_interval_seconds: 60,
35//!     rollup_interval_hours: 1,
36//!     retention: RetentionConfig::default(),
37//!     batch_size: 1000,
38//!     max_query_results: 10000,
39//! };
40//!
41//! let db = AnalyticsDatabase::new(&config.database_path).await?;
42//! db.run_migrations().await?;
43//!
44//! // Query top endpoints
45//! let endpoints = db.get_top_endpoints(10, None).await?;
46//! for endpoint in &endpoints {
47//!     println!("Endpoint: {} - {} requests", endpoint.endpoint, endpoint.total_requests);
48//! }
49//! # Ok(())
50//! # }
51//! ```
52
53pub mod aggregator;
54pub mod config;
55pub mod database;
56pub mod error;
57pub mod export;
58pub mod models;
59pub mod pillar_usage;
60pub mod queries;
61pub mod retention;
62
63pub use config::{AnalyticsConfig, RetentionConfig};
64pub use database::AnalyticsDatabase;
65pub use error::{AnalyticsError, Result};
66pub use models::*;
67pub use pillar_usage::*;
68
69// Explicitly re-export coverage metrics types for easier importing
70pub use models::{
71    DriftPercentageMetrics, EndpointCoverage, PersonaCIHit, RealityLevelStaleness,
72    ScenarioUsageMetrics,
73};
74
75/// Initialize the analytics system with the given configuration
76///
77/// # Errors
78///
79/// Returns an error if the database cannot be opened or migrations fail.
80pub async fn init(config: AnalyticsConfig) -> Result<AnalyticsDatabase> {
81    let db = AnalyticsDatabase::new(&config.database_path).await?;
82    db.run_migrations().await?;
83    Ok(db)
84}
85
86// ---- Global database accessor (#677) -----------------------------------
87//
88// Several middlewares need to record analytics (drift_tracking,
89// set_reality_level, scenario execution, endpoint coverage) but live in
90// crates that don't see each other and can't easily thread an
91// `Arc<AnalyticsDatabase>` through their state. We expose a lazy global
92// the same way `mockforge-observability::get_global_registry()` does for
93// Prometheus — initialise once at startup, fire-and-forget from hot paths.
94
95use once_cell::sync::OnceCell;
96use std::sync::Arc;
97
98static GLOBAL_DB: OnceCell<Arc<AnalyticsDatabase>> = OnceCell::new();
99
100/// Install the global analytics database. Called once from the CLI / server
101/// startup when analytics is enabled. Returns `Err(_)` if it has already been
102/// initialised — callers should treat that as a no-op (the first installation
103/// wins).
104///
105/// Wrapping the `AnalyticsDatabase` in `Arc` lets recorders take cheap clones
106/// without holding the global cell.
107pub fn set_global_db(db: AnalyticsDatabase) -> std::result::Result<(), Arc<AnalyticsDatabase>> {
108    GLOBAL_DB.set(Arc::new(db))
109}
110
111/// Return the global analytics database if one has been installed.
112///
113/// Recording sites should treat `None` as "analytics is disabled, skip"
114/// rather than an error — making the install opt-in keeps the OSS quick-start
115/// from creating a sqlite file the operator didn't ask for.
116pub fn get_global_db() -> Option<Arc<AnalyticsDatabase>> {
117    GLOBAL_DB.get().cloned()
118}
119
120/// Spawn a fire-and-forget task that records a drift percentage sample to
121/// the global analytics database. No-op when the global isn't installed.
122/// Errors are logged at WARN — the hot path must never wait or fail because
123/// analytics is unavailable.
124///
125/// Pair with the `mockforge_drift_percentage` Prometheus gauge so live drift
126/// shows up in both the dashboard query (`get_drift_percentage`) and the
127/// Grafana / `/metrics` time series.
128pub fn record_drift_percentage_async(
129    workspace_id: String,
130    org_id: Option<String>,
131    total_mocks: i64,
132    drifting_mocks: i64,
133) {
134    if let Some(db) = get_global_db() {
135        tokio::spawn(async move {
136            if let Err(e) = db
137                .record_drift_percentage(
138                    &workspace_id,
139                    org_id.as_deref(),
140                    total_mocks,
141                    drifting_mocks,
142                )
143                .await
144            {
145                tracing::warn!(
146                    workspace_id = %workspace_id,
147                    error = %e,
148                    "failed to record drift percentage sample"
149                );
150            }
151        });
152    }
153}
154
155/// Spawn a fire-and-forget task that records that a scenario fired.
156/// No-op when the global isn't installed.
157pub fn record_scenario_usage_async(
158    scenario_id: String,
159    workspace_id: Option<String>,
160    org_id: Option<String>,
161) {
162    if let Some(db) = get_global_db() {
163        tokio::spawn(async move {
164            if let Err(e) = db
165                .record_scenario_usage(&scenario_id, workspace_id.as_deref(), org_id.as_deref())
166                .await
167            {
168                tracing::warn!(
169                    scenario_id = %scenario_id,
170                    error = %e,
171                    "failed to record scenario usage sample"
172                );
173            }
174        });
175    }
176}
177
178/// Spawn a fire-and-forget task that records a single endpoint hit for
179/// coverage tracking. The HTTP middleware calls this on every matched
180/// request when analytics is installed.
181pub fn record_endpoint_coverage_async(
182    endpoint: String,
183    method: Option<String>,
184    protocol: String,
185    workspace_id: Option<String>,
186    org_id: Option<String>,
187) {
188    if let Some(db) = get_global_db() {
189        tokio::spawn(async move {
190            if let Err(e) = db
191                .record_endpoint_coverage(
192                    &endpoint,
193                    method.as_deref(),
194                    &protocol,
195                    workspace_id.as_deref(),
196                    org_id.as_deref(),
197                    None,
198                )
199                .await
200            {
201                tracing::warn!(
202                    endpoint = %endpoint,
203                    error = %e,
204                    "failed to record endpoint coverage sample"
205                );
206            }
207        });
208    }
209}
210
211/// Spawn a fire-and-forget task that records the current reality level for
212/// staleness tracking. `current_reality_level` is the level name (e.g.
213/// `"production_chaos"`); `staleness_days` is "how long ago this level was
214/// last refreshed" — pass `Some(0)` when the level was set right now.
215pub fn record_reality_level_staleness_async(
216    workspace_id: String,
217    org_id: Option<String>,
218    current_reality_level: Option<String>,
219    staleness_days: Option<i32>,
220) {
221    if let Some(db) = get_global_db() {
222        tokio::spawn(async move {
223            if let Err(e) = db
224                .record_reality_level_staleness(
225                    &workspace_id,
226                    org_id.as_deref(),
227                    None,
228                    None,
229                    None,
230                    current_reality_level.as_deref(),
231                    staleness_days,
232                )
233                .await
234            {
235                tracing::warn!(
236                    workspace_id = %workspace_id,
237                    error = %e,
238                    "failed to record reality level staleness sample"
239                );
240            }
241        });
242    }
243}
244
245#[cfg(test)]
246mod global_accessor_tests {
247    use super::*;
248
249    #[tokio::test]
250    async fn get_global_db_returns_none_before_install() {
251        // The global is process-wide and OnceCell, so a test that installs
252        // wouldn't be able to un-install for a second test. This test only
253        // runs reliably as the first global-touching test in the process,
254        // but it documents the intended pre-install state.
255        let observed = get_global_db();
256        // Either the global was never set in this test binary (None) or a
257        // previous test set it; either way the API returns an Option, not
258        // a panic. The recording helpers must remain safe to call when the
259        // global isn't installed.
260        match observed {
261            Some(_) | None => {
262                // Both shapes are valid — assert nothing panics.
263            }
264        }
265    }
266
267    #[tokio::test]
268    async fn recording_helpers_are_safe_when_global_uninstalled() {
269        // The helpers are defined as fire-and-forget no-ops when
270        // `GLOBAL_DB` is empty; this test pins that contract so hot-path
271        // middlewares can call them unconditionally without crashing.
272        record_drift_percentage_async("ws".to_string(), None, 10, 1);
273        record_scenario_usage_async("scenario-a".to_string(), None, None);
274        record_endpoint_coverage_async(
275            "/users".to_string(),
276            Some("GET".to_string()),
277            "http".to_string(),
278            None,
279            None,
280        );
281        record_reality_level_staleness_async(
282            "ws".to_string(),
283            None,
284            Some("static_stubs".to_string()),
285            Some(0),
286        );
287        // Give spawned tasks a moment to attempt anything (they should
288        // bail at the `get_global_db()` guard).
289        tokio::time::sleep(std::time::Duration::from_millis(50)).await;
290    }
291}