Skip to main content

rust_rule_engine/engine/
analytics.rs

1use serde::{Deserialize, Serialize};
2/// Advanced analytics and performance monitoring for rule engine
3/// This module provides comprehensive metrics collection, analysis,
4/// and performance insights for rule execution.
5use std::collections::HashMap;
6use std::time::{Duration, SystemTime};
7
8/// Trend direction for performance metrics
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
10pub enum TrendDirection {
11    /// Performance is improving over time
12    Improving,
13    /// Performance is degrading over time  
14    Degrading,
15    /// Performance is stable over time
16    Stable,
17}
18
19/// Individual rule execution metrics
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct RuleMetrics {
22    /// Name of the rule
23    pub rule_name: String,
24    /// Total number of times this rule was evaluated
25    pub total_evaluations: u64,
26    /// Total number of times this rule fired (condition was true)
27    pub total_fires: u64,
28    /// Total number of successful executions (no errors)
29    pub total_successes: u64,
30    /// Total number of failed executions (with errors)
31    pub total_failures: u64,
32    /// Sum of all execution times for averaging
33    pub total_execution_time: Duration,
34    /// Fastest execution time recorded
35    pub min_execution_time: Duration,
36    /// Slowest execution time recorded
37    pub max_execution_time: Duration,
38    /// Estimated memory usage for this rule
39    pub estimated_memory_usage: usize,
40    /// Last time this rule was executed
41    pub last_executed: Option<SystemTime>,
42    /// Recent execution times (for trend analysis)
43    pub recent_execution_times: Vec<Duration>,
44}
45
46impl RuleMetrics {
47    /// Create new metrics for a rule
48    pub fn new(rule_name: String) -> Self {
49        Self {
50            rule_name,
51            total_evaluations: 0,
52            total_fires: 0,
53            total_successes: 0,
54            total_failures: 0,
55            total_execution_time: Duration::ZERO,
56            min_execution_time: Duration::MAX,
57            max_execution_time: Duration::ZERO,
58            estimated_memory_usage: 0,
59            last_executed: None,
60            recent_execution_times: Vec::new(),
61        }
62    }
63
64    /// Record a successful rule execution
65    pub fn record_execution(&mut self, duration: Duration, fired: bool, memory_usage: usize) {
66        self.total_evaluations += 1;
67        if fired {
68            self.total_fires += 1;
69        }
70        self.total_successes += 1;
71        self.total_execution_time += duration;
72
73        // Update min/max times
74        if duration < self.min_execution_time {
75            self.min_execution_time = duration;
76        }
77        if duration > self.max_execution_time {
78            self.max_execution_time = duration;
79        }
80
81        self.estimated_memory_usage = memory_usage;
82        self.last_executed = Some(SystemTime::now());
83
84        // Keep last 100 execution times for trend analysis
85        self.recent_execution_times.push(duration);
86        if self.recent_execution_times.len() > 100 {
87            self.recent_execution_times.remove(0);
88        }
89    }
90
91    /// Record a failed rule execution
92    pub fn record_failure(&mut self, duration: Duration) {
93        self.total_evaluations += 1;
94        self.total_failures += 1;
95        self.total_execution_time += duration;
96        self.last_executed = Some(SystemTime::now());
97    }
98
99    /// Calculate average execution time
100    pub fn avg_execution_time(&self) -> Duration {
101        if self.total_evaluations > 0 {
102            self.total_execution_time / self.total_evaluations as u32
103        } else {
104            Duration::ZERO
105        }
106    }
107
108    /// Calculate success rate as percentage
109    pub fn success_rate(&self) -> f64 {
110        if self.total_evaluations > 0 {
111            (self.total_successes as f64 / self.total_evaluations as f64) * 100.0
112        } else {
113            0.0
114        }
115    }
116
117    /// Calculate fire rate as percentage
118    pub fn fire_rate(&self) -> f64 {
119        if self.total_evaluations > 0 {
120            (self.total_fires as f64 / self.total_evaluations as f64) * 100.0
121        } else {
122            0.0
123        }
124    }
125
126    /// Check if this rule is performing poorly
127    pub fn is_problematic(&self) -> bool {
128        self.success_rate() < 95.0
129            || self.avg_execution_time() > Duration::from_millis(50)
130            || self.total_failures > 10
131    }
132}
133
134/// Configuration for analytics collection
135#[derive(Debug, Clone)]
136pub struct AnalyticsConfig {
137    /// Whether to track detailed execution timing
138    pub track_execution_time: bool,
139    /// Whether to estimate memory usage
140    pub track_memory_usage: bool,
141    /// Whether to track success/failure rates
142    pub track_success_rate: bool,
143    /// Sampling rate (0.0 to 1.0) - 1.0 means track everything
144    pub sampling_rate: f64,
145    /// How long to retain detailed metrics
146    pub retention_period: Duration,
147    /// Maximum number of recent execution times to keep per rule
148    pub max_recent_samples: usize,
149}
150
151impl Default for AnalyticsConfig {
152    fn default() -> Self {
153        Self {
154            track_execution_time: true,
155            track_memory_usage: true,
156            track_success_rate: true,
157            sampling_rate: 1.0,
158            retention_period: Duration::from_secs(7 * 24 * 60 * 60), // 7 days
159            max_recent_samples: 100,
160        }
161    }
162}
163
164impl AnalyticsConfig {
165    /// Production-ready configuration with reasonable sampling
166    pub fn production() -> Self {
167        Self {
168            track_execution_time: true,
169            track_memory_usage: false, // Expensive in production
170            track_success_rate: true,
171            sampling_rate: 0.1, // Sample 10% of executions
172            retention_period: Duration::from_secs(24 * 60 * 60), // 1 day
173            max_recent_samples: 50,
174        }
175    }
176
177    /// Development configuration with full tracking
178    pub fn development() -> Self {
179        Self {
180            track_execution_time: true,
181            track_memory_usage: true,
182            track_success_rate: true,
183            sampling_rate: 1.0,                             // Track everything
184            retention_period: Duration::from_secs(60 * 60), // 1 hour
185            max_recent_samples: 100,
186        }
187    }
188}
189
190/// Execution event for timeline analysis
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct ExecutionEvent {
193    /// When this event occurred
194    pub timestamp: SystemTime,
195    /// Name of the rule that was executed
196    pub rule_name: String,
197    /// Whether the rule fired
198    pub fired: bool,
199    /// Execution time
200    pub duration: Duration,
201    /// Whether the execution was successful
202    pub success: bool,
203    /// Error message if execution failed
204    pub error: Option<String>,
205}
206
207/// Performance trend analysis
208#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct PerformanceTrend {
210    /// Rule name
211    pub rule_name: String,
212    /// Trend direction: Improving, Degrading, Stable
213    pub trend: TrendDirection,
214    /// Percentage change in performance
215    pub change_percentage: f64,
216    /// Time period of this trend
217    pub period: Duration,
218}
219
220/// Main analytics collector and analyzer
221#[derive(Debug)]
222pub struct RuleAnalytics {
223    /// Configuration for analytics collection
224    config: AnalyticsConfig,
225    /// Metrics for each rule
226    rule_metrics: HashMap<String, RuleMetrics>,
227    /// Timeline of execution events
228    execution_timeline: Vec<ExecutionEvent>,
229    /// When analytics collection started
230    start_time: SystemTime,
231    /// Total number of rule executions tracked
232    total_executions: u64,
233}
234
235impl RuleAnalytics {
236    /// Create new analytics collector
237    pub fn new(config: AnalyticsConfig) -> Self {
238        Self {
239            config,
240            rule_metrics: HashMap::new(),
241            execution_timeline: Vec::new(),
242            start_time: SystemTime::now(),
243            total_executions: 0,
244        }
245    }
246
247    /// Record a rule execution
248    pub fn record_execution(
249        &mut self,
250        rule_name: &str,
251        duration: Duration,
252        fired: bool,
253        success: bool,
254        error: Option<String>,
255        memory_usage: usize,
256    ) {
257        // Apply sampling rate
258        if !self.should_sample() {
259            return;
260        }
261
262        self.total_executions += 1;
263
264        // Update rule metrics
265        let metrics = self
266            .rule_metrics
267            .entry(rule_name.to_string())
268            .or_insert_with(|| RuleMetrics::new(rule_name.to_string()));
269
270        if success {
271            metrics.record_execution(duration, fired, memory_usage);
272        } else {
273            metrics.record_failure(duration);
274        }
275
276        // Add to timeline
277        self.execution_timeline.push(ExecutionEvent {
278            timestamp: SystemTime::now(),
279            rule_name: rule_name.to_string(),
280            fired,
281            duration,
282            success,
283            error,
284        });
285
286        // Clean up old events
287        self.cleanup_old_data();
288    }
289
290    /// Get metrics for a specific rule
291    pub fn get_rule_metrics(&self, rule_name: &str) -> Option<&RuleMetrics> {
292        self.rule_metrics.get(rule_name)
293    }
294
295    /// Get all rule metrics
296    pub fn get_all_metrics(&self) -> &HashMap<String, RuleMetrics> {
297        &self.rule_metrics
298    }
299
300    /// Get the slowest rules
301    pub fn slowest_rules(&self, limit: usize) -> Vec<&RuleMetrics> {
302        let mut rules: Vec<&RuleMetrics> = self.rule_metrics.values().collect();
303        rules.sort_by_key(|b| std::cmp::Reverse(b.avg_execution_time()));
304        rules.into_iter().take(limit).collect()
305    }
306
307    /// Get the most frequently fired rules
308    pub fn most_fired_rules(&self, limit: usize) -> Vec<&RuleMetrics> {
309        let mut rules: Vec<&RuleMetrics> = self.rule_metrics.values().collect();
310        // Use sort_by_key with Reverse for clearer intent and better performance
311        rules.sort_by_key(|b| std::cmp::Reverse(b.total_fires));
312        rules.into_iter().take(limit).collect()
313    }
314
315    /// Get problematic rules (low success rate, high execution time, etc.)
316    pub fn problematic_rules(&self) -> Vec<&RuleMetrics> {
317        self.rule_metrics
318            .values()
319            .filter(|metrics| metrics.is_problematic())
320            .collect()
321    }
322
323    /// Calculate overall performance statistics
324    pub fn overall_stats(&self) -> OverallStats {
325        let total_time: Duration = self
326            .rule_metrics
327            .values()
328            .map(|m| m.total_execution_time)
329            .sum();
330
331        let total_evaluations: u64 = self
332            .rule_metrics
333            .values()
334            .map(|m| m.total_evaluations)
335            .sum();
336
337        let total_fires: u64 = self.rule_metrics.values().map(|m| m.total_fires).sum();
338
339        let total_successes: u64 = self.rule_metrics.values().map(|m| m.total_successes).sum();
340
341        let avg_execution_time = if total_evaluations > 0 {
342            total_time / total_evaluations as u32
343        } else {
344            Duration::ZERO
345        };
346
347        let rules_per_second = if total_time.as_secs_f64() > 0.0 {
348            total_evaluations as f64 / total_time.as_secs_f64()
349        } else {
350            0.0
351        };
352
353        let success_rate = if total_evaluations > 0 {
354            (total_successes as f64 / total_evaluations as f64) * 100.0
355        } else {
356            0.0
357        };
358
359        OverallStats {
360            total_rules: self.rule_metrics.len(),
361            total_evaluations,
362            total_fires,
363            total_successes,
364            avg_execution_time,
365            rules_per_second,
366            success_rate,
367            uptime: self.start_time.elapsed().unwrap_or(Duration::ZERO),
368        }
369    }
370
371    /// Check if we should sample this execution based on sampling rate
372    fn should_sample(&self) -> bool {
373        if self.config.sampling_rate >= 1.0 {
374            return true;
375        }
376
377        use std::collections::hash_map::DefaultHasher;
378        use std::hash::{Hash, Hasher};
379
380        let mut hasher = DefaultHasher::new();
381        self.total_executions.hash(&mut hasher);
382        let hash = hasher.finish();
383
384        (hash as f64 / u64::MAX as f64) < self.config.sampling_rate
385    }
386
387    /// Clean up old data based on retention period
388    fn cleanup_old_data(&mut self) {
389        let cutoff = SystemTime::now()
390            .checked_sub(self.config.retention_period)
391            .unwrap_or(SystemTime::UNIX_EPOCH);
392
393        // Remove old timeline events
394        self.execution_timeline
395            .retain(|event| event.timestamp >= cutoff);
396    }
397
398    /// Get configuration reference
399    pub fn config(&self) -> &AnalyticsConfig {
400        &self.config
401    }
402
403    /// Get all rule metrics as a map
404    pub fn get_all_rule_metrics(&self) -> &HashMap<String, RuleMetrics> {
405        &self.rule_metrics
406    }
407
408    /// Generate optimization recommendations based on analytics data
409    pub fn generate_recommendations(&self) -> Vec<String> {
410        let mut recommendations = Vec::new();
411
412        for (rule_name, metrics) in &self.rule_metrics {
413            // Check for slow rules
414            if metrics.avg_execution_time().as_millis() > 100 {
415                recommendations.push(format!(
416                    "Consider optimizing '{}' - average execution time is {:.2}ms",
417                    rule_name,
418                    metrics.avg_execution_time().as_secs_f64() * 1000.0
419                ));
420            }
421
422            // Check for low success rates
423            if metrics.success_rate() < 50.0 && metrics.total_evaluations > 10 {
424                recommendations.push(format!(
425                    "Rule '{}' has low success rate ({:.1}%) - review conditions",
426                    rule_name,
427                    metrics.success_rate()
428                ));
429            }
430
431            // Check for rules that never fire
432            if metrics.total_fires == 0 && metrics.total_evaluations > 20 {
433                recommendations.push(format!(
434                    "Rule '{}' never fires despite {} evaluations - review logic",
435                    rule_name, metrics.total_evaluations
436                ));
437            }
438        }
439
440        recommendations
441    }
442
443    /// Get recent execution events
444    pub fn get_recent_events(&self, limit: usize) -> Vec<&ExecutionEvent> {
445        self.execution_timeline.iter().rev().take(limit).collect()
446    }
447
448    /// Get overall performance statistics
449    pub fn get_overall_stats(&self) -> OverallStats {
450        self.overall_stats()
451    }
452}
453
454/// Overall performance statistics
455#[derive(Debug, Clone, Serialize, Deserialize)]
456pub struct OverallStats {
457    /// Total number of unique rules
458    pub total_rules: usize,
459    /// Total rule evaluations
460    pub total_evaluations: u64,
461    /// Total rule fires
462    pub total_fires: u64,
463    /// Total successful executions
464    pub total_successes: u64,
465    /// Average execution time across all rules
466    pub avg_execution_time: Duration,
467    /// Rules processed per second
468    pub rules_per_second: f64,
469    /// Overall success rate percentage
470    pub success_rate: f64,
471    /// How long analytics has been running
472    pub uptime: Duration,
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478
479    #[test]
480    fn test_rule_metrics_creation() {
481        let metrics = RuleMetrics::new("TestRule".to_string());
482        assert_eq!(metrics.rule_name, "TestRule");
483        assert_eq!(metrics.total_evaluations, 0);
484        assert_eq!(metrics.success_rate(), 0.0);
485    }
486
487    #[test]
488    fn test_rule_metrics_recording() {
489        let mut metrics = RuleMetrics::new("TestRule".to_string());
490
491        // Record successful execution
492        metrics.record_execution(Duration::from_millis(10), true, 1024);
493
494        assert_eq!(metrics.total_evaluations, 1);
495        assert_eq!(metrics.total_fires, 1);
496        assert_eq!(metrics.total_successes, 1);
497        assert_eq!(metrics.success_rate(), 100.0);
498        assert_eq!(metrics.fire_rate(), 100.0);
499    }
500
501    #[test]
502    fn test_analytics_config() {
503        let config = AnalyticsConfig::production();
504        assert!(config.sampling_rate < 1.0);
505        assert!(!config.track_memory_usage);
506
507        let dev_config = AnalyticsConfig::development();
508        assert_eq!(dev_config.sampling_rate, 1.0);
509        assert!(dev_config.track_memory_usage);
510    }
511
512    #[test]
513    fn test_analytics_recording() {
514        let config = AnalyticsConfig::development();
515        let mut analytics = RuleAnalytics::new(config);
516
517        analytics.record_execution("TestRule", Duration::from_millis(5), true, true, None, 1024);
518
519        assert_eq!(analytics.total_executions, 1);
520        assert!(analytics.get_rule_metrics("TestRule").is_some());
521    }
522}