mockforge_analytics/lib.rs
1//! `MockForge` Analytics
2//!
3//! Provides comprehensive traffic analytics and metrics dashboard capabilities
4//! for `MockForge`, including:
5//!
6//! - Time-series metrics aggregation (minute/hour/day granularity)
7//! - Endpoint performance tracking
8//! - Error analysis and monitoring
9//! - Client analytics
10//! - Traffic pattern detection
11//! - Data export (CSV, JSON)
12//! - Configurable retention policies
13//!
14//! # Architecture
15//!
16//! The analytics system consists of several components:
17//!
18//! - **Database**: SQLite-based storage for aggregated metrics
19//! - **Aggregator**: Background service that queries Prometheus and stores metrics
20//! - **Queries**: High-level query API for dashboard data
21//! - **Export**: Data export functionality
22//! - **Retention**: Automatic cleanup of old data
23//!
24//! # Example
25//!
26//! ```no_run
27//! use mockforge_analytics::{AnalyticsDatabase, AnalyticsConfig, RetentionConfig};
28//! use std::path::PathBuf;
29//!
30//! # async fn example() -> anyhow::Result<()> {
31//! let config = AnalyticsConfig {
32//! enabled: true,
33//! database_path: PathBuf::from("analytics.db"),
34//! aggregation_interval_seconds: 60,
35//! rollup_interval_hours: 1,
36//! retention: RetentionConfig::default(),
37//! batch_size: 1000,
38//! max_query_results: 10000,
39//! };
40//!
41//! let db = AnalyticsDatabase::new(&config.database_path).await?;
42//! db.run_migrations().await?;
43//!
44//! // Query top endpoints
45//! let endpoints = db.get_top_endpoints(10, None).await?;
46//! for endpoint in &endpoints {
47//! println!("Endpoint: {} - {} requests", endpoint.endpoint, endpoint.total_requests);
48//! }
49//! # Ok(())
50//! # }
51//! ```
52
53pub mod aggregator;
54pub mod config;
55pub mod database;
56pub mod error;
57pub mod export;
58pub mod models;
59pub mod pillar_usage;
60pub mod queries;
61pub mod retention;
62
63pub use config::{AnalyticsConfig, RetentionConfig};
64pub use database::AnalyticsDatabase;
65pub use error::{AnalyticsError, Result};
66pub use models::*;
67pub use pillar_usage::*;
68
69// Explicitly re-export coverage metrics types for easier importing
70pub use models::{
71 DriftPercentageMetrics, EndpointCoverage, PersonaCIHit, RealityLevelStaleness,
72 ScenarioUsageMetrics,
73};
74
75/// Initialize the analytics system with the given configuration
76///
77/// # Errors
78///
79/// Returns an error if the database cannot be opened or migrations fail.
80pub async fn init(config: AnalyticsConfig) -> Result<AnalyticsDatabase> {
81 let db = AnalyticsDatabase::new(&config.database_path).await?;
82 db.run_migrations().await?;
83 Ok(db)
84}
85
86// ---- Global database accessor (#677) -----------------------------------
87//
88// Several middlewares need to record analytics (drift_tracking,
89// set_reality_level, scenario execution, endpoint coverage) but live in
90// crates that don't see each other and can't easily thread an
91// `Arc<AnalyticsDatabase>` through their state. We expose a lazy global
92// the same way `mockforge-observability::get_global_registry()` does for
93// Prometheus — initialise once at startup, fire-and-forget from hot paths.
94
95use once_cell::sync::OnceCell;
96use std::sync::Arc;
97
98static GLOBAL_DB: OnceCell<Arc<AnalyticsDatabase>> = OnceCell::new();
99
100/// Install the global analytics database. Called once from the CLI / server
101/// startup when analytics is enabled. Returns `Err(_)` if it has already been
102/// initialised — callers should treat that as a no-op (the first installation
103/// wins).
104///
105/// Wrapping the `AnalyticsDatabase` in `Arc` lets recorders take cheap clones
106/// without holding the global cell.
107pub fn set_global_db(db: AnalyticsDatabase) -> std::result::Result<(), Arc<AnalyticsDatabase>> {
108 GLOBAL_DB.set(Arc::new(db))
109}
110
111/// Return the global analytics database if one has been installed.
112///
113/// Recording sites should treat `None` as "analytics is disabled, skip"
114/// rather than an error — making the install opt-in keeps the OSS quick-start
115/// from creating a sqlite file the operator didn't ask for.
116pub fn get_global_db() -> Option<Arc<AnalyticsDatabase>> {
117 GLOBAL_DB.get().cloned()
118}
119
120/// Spawn a fire-and-forget task that records a drift percentage sample to
121/// the global analytics database. No-op when the global isn't installed.
122/// Errors are logged at WARN — the hot path must never wait or fail because
123/// analytics is unavailable.
124///
125/// Pair with the `mockforge_drift_percentage` Prometheus gauge so live drift
126/// shows up in both the dashboard query (`get_drift_percentage`) and the
127/// Grafana / `/metrics` time series.
128pub fn record_drift_percentage_async(
129 workspace_id: String,
130 org_id: Option<String>,
131 total_mocks: i64,
132 drifting_mocks: i64,
133) {
134 if let Some(db) = get_global_db() {
135 tokio::spawn(async move {
136 if let Err(e) = db
137 .record_drift_percentage(
138 &workspace_id,
139 org_id.as_deref(),
140 total_mocks,
141 drifting_mocks,
142 )
143 .await
144 {
145 tracing::warn!(
146 workspace_id = %workspace_id,
147 error = %e,
148 "failed to record drift percentage sample"
149 );
150 }
151 });
152 }
153}
154
155/// Spawn a fire-and-forget task that records that a scenario fired.
156/// No-op when the global isn't installed.
157pub fn record_scenario_usage_async(
158 scenario_id: String,
159 workspace_id: Option<String>,
160 org_id: Option<String>,
161) {
162 if let Some(db) = get_global_db() {
163 tokio::spawn(async move {
164 if let Err(e) = db
165 .record_scenario_usage(&scenario_id, workspace_id.as_deref(), org_id.as_deref())
166 .await
167 {
168 tracing::warn!(
169 scenario_id = %scenario_id,
170 error = %e,
171 "failed to record scenario usage sample"
172 );
173 }
174 });
175 }
176}
177
178/// Spawn a fire-and-forget task that records a single endpoint hit for
179/// coverage tracking. The HTTP middleware calls this on every matched
180/// request when analytics is installed.
181pub fn record_endpoint_coverage_async(
182 endpoint: String,
183 method: Option<String>,
184 protocol: String,
185 workspace_id: Option<String>,
186 org_id: Option<String>,
187) {
188 if let Some(db) = get_global_db() {
189 tokio::spawn(async move {
190 if let Err(e) = db
191 .record_endpoint_coverage(
192 &endpoint,
193 method.as_deref(),
194 &protocol,
195 workspace_id.as_deref(),
196 org_id.as_deref(),
197 None,
198 )
199 .await
200 {
201 tracing::warn!(
202 endpoint = %endpoint,
203 error = %e,
204 "failed to record endpoint coverage sample"
205 );
206 }
207 });
208 }
209}
210
211/// Spawn a fire-and-forget task that records the current reality level for
212/// staleness tracking. `current_reality_level` is the level name (e.g.
213/// `"production_chaos"`); `staleness_days` is "how long ago this level was
214/// last refreshed" — pass `Some(0)` when the level was set right now.
215pub fn record_reality_level_staleness_async(
216 workspace_id: String,
217 org_id: Option<String>,
218 current_reality_level: Option<String>,
219 staleness_days: Option<i32>,
220) {
221 if let Some(db) = get_global_db() {
222 tokio::spawn(async move {
223 if let Err(e) = db
224 .record_reality_level_staleness(
225 &workspace_id,
226 org_id.as_deref(),
227 None,
228 None,
229 None,
230 current_reality_level.as_deref(),
231 staleness_days,
232 )
233 .await
234 {
235 tracing::warn!(
236 workspace_id = %workspace_id,
237 error = %e,
238 "failed to record reality level staleness sample"
239 );
240 }
241 });
242 }
243}
244
245#[cfg(test)]
246mod global_accessor_tests {
247 use super::*;
248
249 #[tokio::test]
250 async fn get_global_db_returns_none_before_install() {
251 // The global is process-wide and OnceCell, so a test that installs
252 // wouldn't be able to un-install for a second test. This test only
253 // runs reliably as the first global-touching test in the process,
254 // but it documents the intended pre-install state.
255 let observed = get_global_db();
256 // Either the global was never set in this test binary (None) or a
257 // previous test set it; either way the API returns an Option, not
258 // a panic. The recording helpers must remain safe to call when the
259 // global isn't installed.
260 match observed {
261 Some(_) | None => {
262 // Both shapes are valid — assert nothing panics.
263 }
264 }
265 }
266
267 #[tokio::test]
268 async fn recording_helpers_are_safe_when_global_uninstalled() {
269 // The helpers are defined as fire-and-forget no-ops when
270 // `GLOBAL_DB` is empty; this test pins that contract so hot-path
271 // middlewares can call them unconditionally without crashing.
272 record_drift_percentage_async("ws".to_string(), None, 10, 1);
273 record_scenario_usage_async("scenario-a".to_string(), None, None);
274 record_endpoint_coverage_async(
275 "/users".to_string(),
276 Some("GET".to_string()),
277 "http".to_string(),
278 None,
279 None,
280 );
281 record_reality_level_staleness_async(
282 "ws".to_string(),
283 None,
284 Some("static_stubs".to_string()),
285 Some(0),
286 );
287 // Give spawned tasks a moment to attempt anything (they should
288 // bail at the `get_global_db()` guard).
289 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
290 }
291}