rust_rule_engine/engine/
analytics.rs

1use serde::{Deserialize, Serialize};
2/// Advanced analytics and performance monitoring for rule engine
3/// This module provides comprehensive metrics collection, analysis,
4/// and performance insights for rule execution.
5use std::collections::HashMap;
6use std::time::{Duration, SystemTime};
7
8/// Trend direction for performance metrics
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
10pub enum TrendDirection {
11    /// Performance is improving over time
12    Improving,
13    /// Performance is degrading over time  
14    Degrading,
15    /// Performance is stable over time
16    Stable,
17}
18
19/// Individual rule execution metrics
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct RuleMetrics {
22    /// Name of the rule
23    pub rule_name: String,
24    /// Total number of times this rule was evaluated
25    pub total_evaluations: u64,
26    /// Total number of times this rule fired (condition was true)
27    pub total_fires: u64,
28    /// Total number of successful executions (no errors)
29    pub total_successes: u64,
30    /// Total number of failed executions (with errors)
31    pub total_failures: u64,
32    /// Sum of all execution times for averaging
33    pub total_execution_time: Duration,
34    /// Fastest execution time recorded
35    pub min_execution_time: Duration,
36    /// Slowest execution time recorded
37    pub max_execution_time: Duration,
38    /// Estimated memory usage for this rule
39    pub estimated_memory_usage: usize,
40    /// Last time this rule was executed
41    pub last_executed: Option<SystemTime>,
42    /// Recent execution times (for trend analysis)
43    pub recent_execution_times: Vec<Duration>,
44}
45
46impl RuleMetrics {
47    /// Create new metrics for a rule
48    pub fn new(rule_name: String) -> Self {
49        Self {
50            rule_name,
51            total_evaluations: 0,
52            total_fires: 0,
53            total_successes: 0,
54            total_failures: 0,
55            total_execution_time: Duration::ZERO,
56            min_execution_time: Duration::MAX,
57            max_execution_time: Duration::ZERO,
58            estimated_memory_usage: 0,
59            last_executed: None,
60            recent_execution_times: Vec::new(),
61        }
62    }
63
64    /// Record a successful rule execution
65    pub fn record_execution(&mut self, duration: Duration, fired: bool, memory_usage: usize) {
66        self.total_evaluations += 1;
67        if fired {
68            self.total_fires += 1;
69        }
70        self.total_successes += 1;
71        self.total_execution_time += duration;
72
73        // Update min/max times
74        if duration < self.min_execution_time {
75            self.min_execution_time = duration;
76        }
77        if duration > self.max_execution_time {
78            self.max_execution_time = duration;
79        }
80
81        self.estimated_memory_usage = memory_usage;
82        self.last_executed = Some(SystemTime::now());
83
84        // Keep last 100 execution times for trend analysis
85        self.recent_execution_times.push(duration);
86        if self.recent_execution_times.len() > 100 {
87            self.recent_execution_times.remove(0);
88        }
89    }
90
91    /// Record a failed rule execution
92    pub fn record_failure(&mut self, duration: Duration) {
93        self.total_evaluations += 1;
94        self.total_failures += 1;
95        self.total_execution_time += duration;
96        self.last_executed = Some(SystemTime::now());
97    }
98
99    /// Calculate average execution time
100    pub fn avg_execution_time(&self) -> Duration {
101        if self.total_evaluations > 0 {
102            self.total_execution_time / self.total_evaluations as u32
103        } else {
104            Duration::ZERO
105        }
106    }
107
108    /// Calculate success rate as percentage
109    pub fn success_rate(&self) -> f64 {
110        if self.total_evaluations > 0 {
111            (self.total_successes as f64 / self.total_evaluations as f64) * 100.0
112        } else {
113            0.0
114        }
115    }
116
117    /// Calculate fire rate as percentage
118    pub fn fire_rate(&self) -> f64 {
119        if self.total_evaluations > 0 {
120            (self.total_fires as f64 / self.total_evaluations as f64) * 100.0
121        } else {
122            0.0
123        }
124    }
125
126    /// Check if this rule is performing poorly
127    pub fn is_problematic(&self) -> bool {
128        self.success_rate() < 95.0
129            || self.avg_execution_time() > Duration::from_millis(50)
130            || self.total_failures > 10
131    }
132}
133
134/// Configuration for analytics collection
135#[derive(Debug, Clone)]
136pub struct AnalyticsConfig {
137    /// Whether to track detailed execution timing
138    pub track_execution_time: bool,
139    /// Whether to estimate memory usage
140    pub track_memory_usage: bool,
141    /// Whether to track success/failure rates
142    pub track_success_rate: bool,
143    /// Sampling rate (0.0 to 1.0) - 1.0 means track everything
144    pub sampling_rate: f64,
145    /// How long to retain detailed metrics
146    pub retention_period: Duration,
147    /// Maximum number of recent execution times to keep per rule
148    pub max_recent_samples: usize,
149}
150
151impl Default for AnalyticsConfig {
152    fn default() -> Self {
153        Self {
154            track_execution_time: true,
155            track_memory_usage: true,
156            track_success_rate: true,
157            sampling_rate: 1.0,
158            retention_period: Duration::from_secs(7 * 24 * 60 * 60), // 7 days
159            max_recent_samples: 100,
160        }
161    }
162}
163
164impl AnalyticsConfig {
165    /// Production-ready configuration with reasonable sampling
166    pub fn production() -> Self {
167        Self {
168            track_execution_time: true,
169            track_memory_usage: false, // Expensive in production
170            track_success_rate: true,
171            sampling_rate: 0.1, // Sample 10% of executions
172            retention_period: Duration::from_secs(24 * 60 * 60), // 1 day
173            max_recent_samples: 50,
174        }
175    }
176
177    /// Development configuration with full tracking
178    pub fn development() -> Self {
179        Self {
180            track_execution_time: true,
181            track_memory_usage: true,
182            track_success_rate: true,
183            sampling_rate: 1.0,                             // Track everything
184            retention_period: Duration::from_secs(60 * 60), // 1 hour
185            max_recent_samples: 100,
186        }
187    }
188}
189
190/// Execution event for timeline analysis
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct ExecutionEvent {
193    /// When this event occurred
194    pub timestamp: SystemTime,
195    /// Name of the rule that was executed
196    pub rule_name: String,
197    /// Whether the rule fired
198    pub fired: bool,
199    /// Execution time
200    pub duration: Duration,
201    /// Whether the execution was successful
202    pub success: bool,
203    /// Error message if execution failed
204    pub error: Option<String>,
205}
206
207/// Performance trend analysis
208#[derive(Debug, Clone, Serialize, Deserialize)]
209pub struct PerformanceTrend {
210    /// Rule name
211    pub rule_name: String,
212    /// Trend direction: Improving, Degrading, Stable
213    pub trend: TrendDirection,
214    /// Percentage change in performance
215    pub change_percentage: f64,
216    /// Time period of this trend
217    pub period: Duration,
218}
219
220/// Main analytics collector and analyzer
221#[derive(Debug)]
222pub struct RuleAnalytics {
223    /// Configuration for analytics collection
224    config: AnalyticsConfig,
225    /// Metrics for each rule
226    rule_metrics: HashMap<String, RuleMetrics>,
227    /// Timeline of execution events
228    execution_timeline: Vec<ExecutionEvent>,
229    /// When analytics collection started
230    start_time: SystemTime,
231    /// Total number of rule executions tracked
232    total_executions: u64,
233}
234
235impl RuleAnalytics {
236    /// Create new analytics collector
237    pub fn new(config: AnalyticsConfig) -> Self {
238        Self {
239            config,
240            rule_metrics: HashMap::new(),
241            execution_timeline: Vec::new(),
242            start_time: SystemTime::now(),
243            total_executions: 0,
244        }
245    }
246
247    /// Record a rule execution
248    pub fn record_execution(
249        &mut self,
250        rule_name: &str,
251        duration: Duration,
252        fired: bool,
253        success: bool,
254        error: Option<String>,
255        memory_usage: usize,
256    ) {
257        // Apply sampling rate
258        if !self.should_sample() {
259            return;
260        }
261
262        self.total_executions += 1;
263
264        // Update rule metrics
265        let metrics = self
266            .rule_metrics
267            .entry(rule_name.to_string())
268            .or_insert_with(|| RuleMetrics::new(rule_name.to_string()));
269
270        if success {
271            metrics.record_execution(duration, fired, memory_usage);
272        } else {
273            metrics.record_failure(duration);
274        }
275
276        // Add to timeline
277        self.execution_timeline.push(ExecutionEvent {
278            timestamp: SystemTime::now(),
279            rule_name: rule_name.to_string(),
280            fired,
281            duration,
282            success,
283            error,
284        });
285
286        // Clean up old events
287        self.cleanup_old_data();
288    }
289
290    /// Get metrics for a specific rule
291    pub fn get_rule_metrics(&self, rule_name: &str) -> Option<&RuleMetrics> {
292        self.rule_metrics.get(rule_name)
293    }
294
295    /// Get all rule metrics
296    pub fn get_all_metrics(&self) -> &HashMap<String, RuleMetrics> {
297        &self.rule_metrics
298    }
299
300    /// Get the slowest rules
301    pub fn slowest_rules(&self, limit: usize) -> Vec<&RuleMetrics> {
302        let mut rules: Vec<&RuleMetrics> = self.rule_metrics.values().collect();
303        rules.sort_by_key(|b| std::cmp::Reverse(b.avg_execution_time()));
304        rules.into_iter().take(limit).collect()
305    }
306
307    /// Get the most frequently fired rules
308    pub fn most_fired_rules(&self, limit: usize) -> Vec<&RuleMetrics> {
309        let mut rules: Vec<&RuleMetrics> = self.rule_metrics.values().collect();
310        rules.sort_by(|a, b| b.total_fires.cmp(&a.total_fires));
311        rules.into_iter().take(limit).collect()
312    }
313
314    /// Get problematic rules (low success rate, high execution time, etc.)
315    pub fn problematic_rules(&self) -> Vec<&RuleMetrics> {
316        self.rule_metrics
317            .values()
318            .filter(|metrics| metrics.is_problematic())
319            .collect()
320    }
321
322    /// Calculate overall performance statistics
323    pub fn overall_stats(&self) -> OverallStats {
324        let total_time: Duration = self
325            .rule_metrics
326            .values()
327            .map(|m| m.total_execution_time)
328            .sum();
329
330        let total_evaluations: u64 = self
331            .rule_metrics
332            .values()
333            .map(|m| m.total_evaluations)
334            .sum();
335
336        let total_fires: u64 = self.rule_metrics.values().map(|m| m.total_fires).sum();
337
338        let total_successes: u64 = self.rule_metrics.values().map(|m| m.total_successes).sum();
339
340        let avg_execution_time = if total_evaluations > 0 {
341            total_time / total_evaluations as u32
342        } else {
343            Duration::ZERO
344        };
345
346        let rules_per_second = if total_time.as_secs_f64() > 0.0 {
347            total_evaluations as f64 / total_time.as_secs_f64()
348        } else {
349            0.0
350        };
351
352        let success_rate = if total_evaluations > 0 {
353            (total_successes as f64 / total_evaluations as f64) * 100.0
354        } else {
355            0.0
356        };
357
358        OverallStats {
359            total_rules: self.rule_metrics.len(),
360            total_evaluations,
361            total_fires,
362            total_successes,
363            avg_execution_time,
364            rules_per_second,
365            success_rate,
366            uptime: self.start_time.elapsed().unwrap_or(Duration::ZERO),
367        }
368    }
369
370    /// Check if we should sample this execution based on sampling rate
371    fn should_sample(&self) -> bool {
372        if self.config.sampling_rate >= 1.0 {
373            return true;
374        }
375
376        use std::collections::hash_map::DefaultHasher;
377        use std::hash::{Hash, Hasher};
378
379        let mut hasher = DefaultHasher::new();
380        self.total_executions.hash(&mut hasher);
381        let hash = hasher.finish();
382
383        (hash as f64 / u64::MAX as f64) < self.config.sampling_rate
384    }
385
386    /// Clean up old data based on retention period
387    fn cleanup_old_data(&mut self) {
388        let cutoff = SystemTime::now()
389            .checked_sub(self.config.retention_period)
390            .unwrap_or(SystemTime::UNIX_EPOCH);
391
392        // Remove old timeline events
393        self.execution_timeline
394            .retain(|event| event.timestamp >= cutoff);
395    }
396
397    /// Get configuration reference
398    pub fn config(&self) -> &AnalyticsConfig {
399        &self.config
400    }
401
402    /// Get all rule metrics as a map
403    pub fn get_all_rule_metrics(&self) -> &HashMap<String, RuleMetrics> {
404        &self.rule_metrics
405    }
406
407    /// Generate optimization recommendations based on analytics data
408    pub fn generate_recommendations(&self) -> Vec<String> {
409        let mut recommendations = Vec::new();
410
411        for (rule_name, metrics) in &self.rule_metrics {
412            // Check for slow rules
413            if metrics.avg_execution_time().as_millis() > 100 {
414                recommendations.push(format!(
415                    "Consider optimizing '{}' - average execution time is {:.2}ms",
416                    rule_name,
417                    metrics.avg_execution_time().as_secs_f64() * 1000.0
418                ));
419            }
420
421            // Check for low success rates
422            if metrics.success_rate() < 50.0 && metrics.total_evaluations > 10 {
423                recommendations.push(format!(
424                    "Rule '{}' has low success rate ({:.1}%) - review conditions",
425                    rule_name,
426                    metrics.success_rate()
427                ));
428            }
429
430            // Check for rules that never fire
431            if metrics.total_fires == 0 && metrics.total_evaluations > 20 {
432                recommendations.push(format!(
433                    "Rule '{}' never fires despite {} evaluations - review logic",
434                    rule_name, metrics.total_evaluations
435                ));
436            }
437        }
438
439        recommendations
440    }
441
442    /// Get recent execution events
443    pub fn get_recent_events(&self, limit: usize) -> Vec<&ExecutionEvent> {
444        self.execution_timeline.iter().rev().take(limit).collect()
445    }
446
447    /// Get overall performance statistics
448    pub fn get_overall_stats(&self) -> OverallStats {
449        self.overall_stats()
450    }
451}
452
453/// Overall performance statistics
454#[derive(Debug, Clone, Serialize, Deserialize)]
455pub struct OverallStats {
456    /// Total number of unique rules
457    pub total_rules: usize,
458    /// Total rule evaluations
459    pub total_evaluations: u64,
460    /// Total rule fires
461    pub total_fires: u64,
462    /// Total successful executions
463    pub total_successes: u64,
464    /// Average execution time across all rules
465    pub avg_execution_time: Duration,
466    /// Rules processed per second
467    pub rules_per_second: f64,
468    /// Overall success rate percentage
469    pub success_rate: f64,
470    /// How long analytics has been running
471    pub uptime: Duration,
472}
473
474#[cfg(test)]
475mod tests {
476    use super::*;
477
478    #[test]
479    fn test_rule_metrics_creation() {
480        let metrics = RuleMetrics::new("TestRule".to_string());
481        assert_eq!(metrics.rule_name, "TestRule");
482        assert_eq!(metrics.total_evaluations, 0);
483        assert_eq!(metrics.success_rate(), 0.0);
484    }
485
486    #[test]
487    fn test_rule_metrics_recording() {
488        let mut metrics = RuleMetrics::new("TestRule".to_string());
489
490        // Record successful execution
491        metrics.record_execution(Duration::from_millis(10), true, 1024);
492
493        assert_eq!(metrics.total_evaluations, 1);
494        assert_eq!(metrics.total_fires, 1);
495        assert_eq!(metrics.total_successes, 1);
496        assert_eq!(metrics.success_rate(), 100.0);
497        assert_eq!(metrics.fire_rate(), 100.0);
498    }
499
500    #[test]
501    fn test_analytics_config() {
502        let config = AnalyticsConfig::production();
503        assert!(config.sampling_rate < 1.0);
504        assert!(!config.track_memory_usage);
505
506        let dev_config = AnalyticsConfig::development();
507        assert_eq!(dev_config.sampling_rate, 1.0);
508        assert!(dev_config.track_memory_usage);
509    }
510
511    #[test]
512    fn test_analytics_recording() {
513        let config = AnalyticsConfig::development();
514        let mut analytics = RuleAnalytics::new(config);
515
516        analytics.record_execution("TestRule", Duration::from_millis(5), true, true, None, 1024);
517
518        assert_eq!(analytics.total_executions, 1);
519        assert!(analytics.get_rule_metrics("TestRule").is_some());
520    }
521}