clock_curve_math/ct/monitoring.rs
1//! Runtime monitoring for cryptographic operations.
2//!
3//! This module provides runtime timing monitoring capabilities for detecting
4//! timing anomalies in production cryptographic operations. It can help identify
5//! potential side-channel vulnerabilities that manifest at runtime.
6//!
7//! # Features
8//!
9//! ## Timing Anomaly Detection
10//! - Statistical analysis of operation timing
11//! - Threshold-based anomaly detection
12//! - Configurable monitoring levels
13//!
14//! ## Performance Impact Control
15//! - Minimal overhead in production
16//! - Configurable sampling rates
17//! - Optional monitoring for debugging
18//!
19//! ## Alert System
20//! - Configurable alert thresholds
21//! - Logging of timing anomalies
22//! - Integration with monitoring systems
23//!
24//! # Usage
25//!
26//! ```ignore
27//! use clock_curve_math::ct::monitoring::*;
28//!
29//! // Monitor a cryptographic operation
30//! let result = monitor_operation("scalar_mul", || {
31//! scalar_a.mul(&scalar_b)
32//! });
33//!
34//! // Check for timing anomalies
35//! if let Some(anomaly) = result.anomaly {
36//! log::warn!("Timing anomaly detected: {:?}", anomaly);
37//! }
38//! ```
39
40use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
41use core::time::Duration;
42
43/// Global monitoring configuration.
44static MONITORING_ENABLED: AtomicBool = AtomicBool::new(false);
45static SAMPLE_RATE: AtomicUsize = AtomicUsize::new(1000); // Sample every 1000 operations
46
47/// Configuration for runtime cryptographic operation monitoring.
48///
49/// This struct controls how the monitoring system behaves, including sampling rates,
50/// alert thresholds, and performance constraints. Proper configuration balances
51/// security monitoring with production performance requirements.
52///
53/// # Security vs Performance Trade-offs
54/// - **Higher sample rates**: Better anomaly detection but increased overhead
55/// - **Lower thresholds**: More sensitive detection but higher false positives
56/// - **Shorter timeouts**: Faster failure detection but may flag legitimate operations
57///
58/// # Recommended Settings
59/// - **Development**: High sample rate (1-10), low thresholds for testing
60/// - **Production**: Low sample rate (100-1000), balanced thresholds for monitoring
61/// - **High-security**: Medium sample rate (10-100), strict thresholds
62///
63/// # Fields
64/// - `enabled`: Master switch for all monitoring functionality
65/// - `sample_rate`: How often to monitor operations (1 = always, higher = less frequent)
66/// - `alert_threshold`: Statistical threshold for anomaly detection (in standard deviations)
67/// - `max_variation_percent`: Maximum allowed timing variation as percentage
68/// - `timeout`: Maximum allowed duration before timing out an operation
69#[derive(Debug, Clone)]
70pub struct MonitoringConfig {
71 /// Master enable switch for monitoring functionality
72 pub enabled: bool,
73 /// Sample rate for monitoring (1 = monitor every operation, higher values = less frequent)
74 /// Lower values provide better coverage but increase performance overhead
75 pub sample_rate: usize,
76 /// Statistical threshold for anomaly detection in standard deviations
77 /// Values like 2.0-3.0 are typical for detecting significant deviations
78 pub alert_threshold: f64,
79 /// Maximum allowed timing variation as a percentage of mean execution time
80 /// Helps detect systems under load or environmental changes
81 pub max_variation_percent: f64,
82 /// Maximum allowed duration for any monitored operation
83 /// Operations exceeding this timeout trigger immediate alerts
84 pub timeout: Duration,
85}
86
87impl Default for MonitoringConfig {
88 /// Creates a default monitoring configuration optimized for production use.
89 ///
90 /// The default configuration prioritizes performance while providing basic
91 /// monitoring capabilities. Monitoring is disabled by default to avoid
92 /// performance impact in production environments.
93 ///
94 /// # Default Values
95 /// - `enabled: false` - Monitoring disabled for performance
96 /// - `sample_rate: 1000` - Monitor 1 in 1000 operations
97 /// - `alert_threshold: 3.0` - Alert on 3-sigma deviations (99.7% confidence)
98 /// - `max_variation_percent: 10.0` - Allow 10% timing variation
99 /// - `timeout: 100ms` - Operations should complete within 100 milliseconds
100 ///
101 /// # Usage
102 /// ```ignore
103 /// use clock_curve_math::ct::monitoring::MonitoringConfig;
104 ///
105 /// // Use defaults (monitoring disabled)
106 /// let config = MonitoringConfig::default();
107 ///
108 /// // Enable monitoring for development
109 /// let dev_config = MonitoringConfig {
110 /// enabled: true,
111 /// sample_rate: 10, // Monitor more frequently
112 /// ..MonitoringConfig::default()
113 /// };
114 /// ```
115 fn default() -> Self {
116 Self {
117 enabled: false,
118 sample_rate: 1000,
119 alert_threshold: 3.0, // 3 standard deviations
120 max_variation_percent: 10.0, // 10% variation allowed
121 timeout: Duration::from_millis(100),
122 }
123 }
124}
125
126/// Result of monitoring a cryptographic operation.
127#[derive(Debug, Clone)]
128pub struct MonitoringResult<T> {
129 /// The result of the operation
130 pub result: T,
131 /// Timing information
132 pub timing: TimingInfo,
133 /// Detected anomaly (if any)
134 pub anomaly: Option<TimingAnomaly>,
135}
136
137/// Timing information for an operation.
138#[derive(Debug, Clone)]
139pub struct TimingInfo {
140 /// Duration of the operation
141 pub duration: Duration,
142 /// Expected duration (from baseline)
143 pub expected_duration: Option<Duration>,
144 /// Deviation from expected (in standard deviations)
145 pub deviation: Option<f64>,
146}
147
148/// Detected timing anomaly in a cryptographic operation.
149///
150/// Represents different types of timing irregularities that may indicate
151/// security issues, performance problems, or environmental changes affecting
152/// cryptographic operation timing.
153///
154/// # Anomaly Types
155/// - **Timeout**: Operation exceeded maximum allowed duration
156/// - **Deviation**: Statistical deviation from expected timing baseline
157/// - **HighVariation**: Excessive timing jitter/variation detected
158///
159/// # Security Implications
160/// Timing anomalies can indicate:
161/// - Side-channel vulnerabilities (timing leaks)
162/// - Performance degradation affecting security margins
163/// - Environmental changes (CPU frequency, memory pressure)
164/// - Resource contention in shared environments
165///
166/// # Response Actions
167/// Different anomaly types suggest different responses:
168/// - **Timeout**: Immediate investigation (possible DoS or hanging operation)
169/// - **Deviation**: Statistical monitoring (may indicate side-channel)
170/// - **HighVariation**: System health check (environmental factors)
171#[derive(Debug, Clone)]
172pub enum TimingAnomaly {
173 /// Operation exceeded the configured timeout duration.
174 ///
175 /// The operation took longer than the maximum allowed time, which could
176 /// indicate a performance issue, resource exhaustion, or security problem.
177 /// This is the most serious anomaly type requiring immediate attention.
178 Timeout {
179 /// Actual measured duration of the operation
180 actual: Duration,
181 /// Maximum allowed duration before timeout
182 limit: Duration,
183 },
184
185 /// Operation timing deviated significantly from statistical baseline.
186 ///
187 /// The operation's execution time differed from the expected duration
188 /// by more than the configured threshold (measured in standard deviations).
189 /// This could indicate timing-based side-channel vulnerabilities.
190 Deviation {
191 /// Statistical deviation from baseline in standard deviations
192 /// (e.g., 3.5 means 3.5 standard deviations from mean)
193 deviation: f64,
194 /// Maximum allowed deviation threshold
195 threshold: f64,
196 },
197
198 /// Operation exhibited excessive timing variation.
199 ///
200 /// The operation's timing showed higher variation than allowed,
201 /// indicating inconsistent performance that could mask timing attacks
202 /// or indicate system instability.
203 HighVariation {
204 /// Measured timing variation as percentage of mean duration
205 variation_percent: f64,
206 /// Maximum allowed variation percentage
207 max_allowed: f64,
208 },
209}
210
211/// Monitor a cryptographic operation with timing checks.
212///
213/// This function executes the operation while monitoring its timing and
214/// checking for anomalies. Monitoring is only performed according to the
215/// configured sample rate to minimize performance impact.
216pub fn monitor_operation<F, T>(operation_name: &str, operation: F) -> MonitoringResult<T>
217where
218 F: FnOnce() -> T,
219{
220 if !should_monitor() {
221 // Fast path - no monitoring
222 return MonitoringResult {
223 result: operation(),
224 timing: TimingInfo {
225 duration: Duration::from_nanos(0),
226 expected_duration: None,
227 deviation: None,
228 },
229 anomaly: None,
230 };
231 }
232
233 // Get baseline timing for this operation type
234 let baseline = get_baseline_timing(operation_name);
235
236 // Time the operation
237 let start = get_current_time();
238 let result = operation();
239 let end = get_current_time();
240 let duration = end.saturating_sub(start);
241
242 // Check for anomalies
243 let anomaly = detect_anomaly(operation_name, duration, &baseline);
244
245 // Update baseline with new measurement
246 update_baseline_timing(operation_name, duration);
247
248 MonitoringResult {
249 result,
250 timing: TimingInfo {
251 duration,
252 expected_duration: baseline.mean_duration,
253 deviation: anomaly.as_ref().and_then(|a| match a {
254 TimingAnomaly::Deviation { deviation, .. } => Some(*deviation),
255 _ => None,
256 }),
257 },
258 anomaly,
259 }
260}
261
262/// Monitor a cryptographic operation with custom configuration.
263pub fn monitor_operation_with_config<F, T>(
264 operation_name: &str,
265 config: &MonitoringConfig,
266 operation: F,
267) -> MonitoringResult<T>
268where
269 F: FnOnce() -> T,
270{
271 if !config.enabled || !should_monitor_with_rate(config.sample_rate) {
272 return MonitoringResult {
273 result: operation(),
274 timing: TimingInfo {
275 duration: Duration::from_nanos(0),
276 expected_duration: None,
277 deviation: None,
278 },
279 anomaly: None,
280 };
281 }
282
283 let baseline = get_baseline_timing(operation_name);
284 let start = get_current_time();
285 let result = operation();
286 let end = get_current_time();
287 let duration = end.saturating_sub(start);
288
289 let anomaly = detect_anomaly_with_config(operation_name, duration, &baseline, config);
290 update_baseline_timing(operation_name, duration);
291
292 MonitoringResult {
293 result,
294 timing: TimingInfo {
295 duration,
296 expected_duration: baseline.mean_duration,
297 deviation: anomaly.as_ref().and_then(|a| match a {
298 TimingAnomaly::Deviation { deviation, .. } => Some(*deviation),
299 _ => None,
300 }),
301 },
302 anomaly,
303 }
304}
305
306/// Enable runtime monitoring globally.
307///
308/// Activates timing monitoring for all cryptographic operations that use
309/// the monitoring functions. When enabled, operations will be periodically
310/// sampled and checked for timing anomalies based on the configured sample rate.
311///
312/// # Performance Impact
313/// Enabling monitoring adds overhead to cryptographic operations, though the
314/// impact is minimized through sampling. The exact overhead depends on the
315/// sample rate and the complexity of monitored operations.
316///
317/// # Security Benefits
318/// - Detects timing-based side-channel vulnerabilities in production
319/// - Identifies performance regressions that could indicate security issues
320/// - Provides early warning of environmental changes affecting security
321///
322/// # Thread Safety
323/// This function is thread-safe and can be called from any thread.
324pub fn enable_monitoring() {
325 MONITORING_ENABLED.store(true, Ordering::Relaxed);
326}
327
328/// Disable runtime monitoring globally.
329///
330/// Deactivates timing monitoring to minimize performance overhead in production
331/// environments where monitoring is not required. When disabled, monitoring
332/// functions will execute operations without timing checks.
333///
334/// # Use Cases
335/// - Production deployments where performance is critical
336/// - Development environments needing maximum speed
337/// - Situations where monitoring overhead is unacceptable
338///
339/// # Security Considerations
340/// Disabling monitoring removes the ability to detect timing anomalies that
341/// could indicate side-channel vulnerabilities. Use with caution.
342///
343/// # Thread Safety
344/// This function is thread-safe and can be called from any thread.
345pub fn disable_monitoring() {
346 MONITORING_ENABLED.store(false, Ordering::Relaxed);
347}
348
349/// Set the global monitoring sample rate.
350///
351/// Controls how frequently operations are monitored. A sample rate of 1 means
352/// every operation is monitored, while higher values mean less frequent monitoring.
353/// The sample rate affects both performance overhead and anomaly detection coverage.
354///
355/// # Parameters
356/// * `rate` - Sample rate (1 = monitor every operation, higher = less frequent)
357///
358/// # Performance vs Security Trade-off
359/// - **Rate = 1**: Maximum security coverage, highest performance impact
360/// - **Rate = 100**: Good balance for development, moderate overhead
361/// - **Rate = 1000**: Minimal production overhead, basic monitoring coverage
362///
363/// # Examples
364/// ```ignore
365/// use clock_curve_math::ct::monitoring::set_sample_rate;
366///
367/// // Monitor every 100th operation (good for development)
368/// set_sample_rate(100);
369///
370/// // Monitor every 1000th operation (good for production)
371/// set_sample_rate(1000);
372/// ```
373///
374/// # Thread Safety
375/// This function is thread-safe and can be called from any thread.
376pub fn set_sample_rate(rate: usize) {
377 SAMPLE_RATE.store(rate, Ordering::Relaxed);
378}
379
380/// Check if monitoring should be performed based on global configuration.
381///
382/// Determines whether the current operation should be monitored based on
383/// the global monitoring enable flag and sample rate. This function is
384/// called frequently and must be highly optimized.
385///
386/// # Returns
387/// `true` if monitoring should be performed, `false` if the operation
388/// should execute without monitoring overhead.
389///
390/// # Performance
391/// This function uses atomic operations and is designed to be fast
392/// when monitoring is disabled (the common production case).
393fn should_monitor() -> bool {
394 if !MONITORING_ENABLED.load(Ordering::Relaxed) {
395 return false;
396 }
397
398 should_monitor_with_rate(SAMPLE_RATE.load(Ordering::Relaxed))
399}
400
401/// Check if monitoring should be performed with a specific sample rate.
402///
403/// Implements sampling-based monitoring where only a fraction of operations
404/// are monitored to reduce performance overhead while maintaining statistical
405/// coverage for anomaly detection.
406///
407/// # Parameters
408/// * `sample_rate` - How often to monitor (1 = always, higher = less frequent)
409///
410/// # Algorithm
411/// Uses a global counter that increments atomically for each check.
412/// When `counter % sample_rate == 0`, monitoring is performed.
413/// This ensures even sampling distribution over time.
414///
415/// # Returns
416/// `true` if this operation should be monitored, `false` otherwise
417///
418/// # Thread Safety
419/// Uses atomic operations to ensure correct behavior in multi-threaded environments.
420fn should_monitor_with_rate(sample_rate: usize) -> bool {
421 if sample_rate <= 1 {
422 return true; // Monitor every operation
423 }
424
425 static COUNTER: AtomicUsize = AtomicUsize::new(0);
426 let count = COUNTER.fetch_add(1, Ordering::Relaxed);
427 count % sample_rate == 0
428}
429
430/// Get current time (architecture-dependent implementation).
431#[cfg(feature = "std")]
432fn get_current_time() -> std::time::Instant {
433 std::time::Instant::now()
434}
435
436#[cfg(not(feature = "std"))]
437fn get_current_time() -> core::time::Duration {
438 // Fallback for no_std - not accurate but provides basic functionality
439 // In a real implementation, this would use a hardware timer
440 core::time::Duration::from_nanos(0)
441}
442
443/// Statistical baseline timing data for an operation type.
444///
445/// Maintains running statistics about the expected execution time for a
446/// specific cryptographic operation. Used to detect timing anomalies by
447/// comparing new measurements against historical performance data.
448///
449/// # Statistical Properties
450/// - Tracks mean execution time and variance
451/// - Uses Welford's online algorithm for numerical stability
452/// - Maintains sample count for statistical confidence
453///
454/// # Fields
455/// - `mean_duration`: Expected execution time (None until first sample)
456/// - `variance`: Statistical variance in execution times
457/// - `sample_count`: Number of measurements collected
458///
459/// # Usage in Anomaly Detection
460/// - Requires minimum samples (typically 10+) for reliable statistics
461/// - Uses standard deviation analysis for threshold detection
462/// - Accounts for natural timing variation in real systems
463///
464/// # Memory Management
465/// Stored in a fixed-size hash table for simplicity. In production,
466/// this would typically use a more scalable data structure.
467#[derive(Debug, Clone)]
468struct BaselineTiming {
469 /// Mean execution duration (None until first measurement collected)
470 mean_duration: Option<Duration>,
471 /// Statistical variance of execution times (for anomaly detection)
472 variance: f64,
473 /// Number of timing samples collected (affects statistical confidence)
474 sample_count: usize,
475}
476
477impl Default for BaselineTiming {
478 /// Creates a new baseline timing with no measurements.
479 ///
480 /// Initializes a baseline timing structure representing the state before
481 /// any measurements have been collected. This is the starting state for
482 /// new operation types that haven't been monitored yet.
483 ///
484 /// # Initial State
485 /// - `mean_duration: None` - No baseline established yet
486 /// - `variance: 0.0` - Zero variance initially
487 /// - `sample_count: 0` - No samples collected
488 ///
489 /// The baseline will be populated as operations are monitored and
490 /// timing measurements are collected over time.
491 fn default() -> Self {
492 Self {
493 mean_duration: None,
494 variance: 0.0,
495 sample_count: 0,
496 }
497 }
498}
499
500// Global storage for baseline timings using thread-safe static
501// Note: This simplified implementation uses atomic operations for thread safety
502// In production, consider using a proper concurrent hash map
503use core::sync::atomic::AtomicU64;
504
505static BASELINE_MEAN_STORAGE: [AtomicU64; 16] = [
506 AtomicU64::new(0),
507 AtomicU64::new(0),
508 AtomicU64::new(0),
509 AtomicU64::new(0),
510 AtomicU64::new(0),
511 AtomicU64::new(0),
512 AtomicU64::new(0),
513 AtomicU64::new(0),
514 AtomicU64::new(0),
515 AtomicU64::new(0),
516 AtomicU64::new(0),
517 AtomicU64::new(0),
518 AtomicU64::new(0),
519 AtomicU64::new(0),
520 AtomicU64::new(0),
521 AtomicU64::new(0),
522];
523
524static BASELINE_VARIANCE_STORAGE: [AtomicU64; 16] = [
525 AtomicU64::new(0),
526 AtomicU64::new(0),
527 AtomicU64::new(0),
528 AtomicU64::new(0),
529 AtomicU64::new(0),
530 AtomicU64::new(0),
531 AtomicU64::new(0),
532 AtomicU64::new(0),
533 AtomicU64::new(0),
534 AtomicU64::new(0),
535 AtomicU64::new(0),
536 AtomicU64::new(0),
537 AtomicU64::new(0),
538 AtomicU64::new(0),
539 AtomicU64::new(0),
540 AtomicU64::new(0),
541];
542
543static BASELINE_SAMPLE_STORAGE: [AtomicUsize; 16] = [
544 AtomicUsize::new(0),
545 AtomicUsize::new(0),
546 AtomicUsize::new(0),
547 AtomicUsize::new(0),
548 AtomicUsize::new(0),
549 AtomicUsize::new(0),
550 AtomicUsize::new(0),
551 AtomicUsize::new(0),
552 AtomicUsize::new(0),
553 AtomicUsize::new(0),
554 AtomicUsize::new(0),
555 AtomicUsize::new(0),
556 AtomicUsize::new(0),
557 AtomicUsize::new(0),
558 AtomicUsize::new(0),
559 AtomicUsize::new(0),
560];
561
562/// Simple hash function to map operation names to indices.
563fn operation_name_hash(name: &str) -> usize {
564 let mut hash = 0usize;
565 for byte in name.bytes() {
566 hash = hash.wrapping_mul(31).wrapping_add(byte as usize);
567 }
568 hash % 16 // Fit into our fixed-size array
569}
570
571/// Get baseline timing statistics for an operation type.
572///
573/// Retrieves the current baseline timing data for a specific operation,
574/// which includes mean duration, variance, and sample count. This baseline
575/// is used to detect timing anomalies by comparing new measurements against
576/// historical performance.
577///
578/// # Parameters
579/// * `operation_name` - Identifier for the operation type
580///
581/// # Returns
582/// Current baseline timing statistics for the operation
583///
584/// # Implementation Notes
585/// Uses thread-safe atomic operations for concurrent access.
586/// In production, this would typically use a proper concurrent hash map.
587fn get_baseline_timing(operation_name: &str) -> BaselineTiming {
588 let index = operation_name_hash(operation_name);
589
590 // Reconstruct BaselineTiming from atomic storage
591 let mean_ns = BASELINE_MEAN_STORAGE[index].load(Ordering::Relaxed);
592 let variance_bits = BASELINE_VARIANCE_STORAGE[index].load(Ordering::Relaxed);
593 let sample_count = BASELINE_SAMPLE_STORAGE[index].load(Ordering::Relaxed);
594
595 // Convert stored values back to BaselineTiming
596 let mean_duration = if mean_ns > 0 {
597 Some(Duration::from_nanos(mean_ns))
598 } else {
599 None
600 };
601
602 // Reconstruct f64 from bits (simplified - in practice would need proper serialization)
603 let variance = f64::from_bits(variance_bits);
604
605 BaselineTiming {
606 mean_duration,
607 variance,
608 sample_count,
609 }
610}
611
612/// Update baseline timing statistics with a new measurement.
613///
614/// Incorporates a new timing measurement into the running statistics for
615/// an operation type. Uses thread-safe atomic operations to maintain
616/// mean and variance estimates without storing all historical measurements.
617///
618/// # Parameters
619/// * `operation_name` - Identifier for the operation type
620/// * `duration` - Measured duration of the latest operation execution
621///
622/// # Algorithm
623/// - Uses atomic operations for thread-safe updates
624/// - Simplified statistical tracking compared to full Welford's algorithm
625/// - Suitable for monitoring purposes where exact precision is less critical than thread safety
626///
627/// # Performance
628/// O(1) time complexity with atomic operations, suitable for concurrent monitoring.
629fn update_baseline_timing(operation_name: &str, duration: Duration) {
630 let index = operation_name_hash(operation_name);
631 let duration_ns = duration.as_nanos() as u64;
632
633 // Update sample count atomically
634 let old_count = BASELINE_SAMPLE_STORAGE[index].fetch_add(1, Ordering::Relaxed);
635 let new_count = old_count + 1;
636
637 // Update mean using atomic operations (simplified running average)
638 let old_mean = BASELINE_MEAN_STORAGE[index].load(Ordering::Relaxed);
639 if old_mean == 0 {
640 // First measurement
641 BASELINE_MEAN_STORAGE[index].store(duration_ns, Ordering::Relaxed);
642 } else {
643 // Running average: new_mean = (old_mean * old_count + duration) / new_count
644 let new_mean =
645 ((old_mean as u128 * old_count as u128) + duration_ns as u128) / new_count as u128;
646 BASELINE_MEAN_STORAGE[index].store(new_mean as u64, Ordering::Relaxed);
647 }
648
649 // For variance, we use a simplified approach - store the variance as bits
650 // In practice, this would need more sophisticated atomic statistical tracking
651 // For now, we store a placeholder variance (could be improved with better atomic math)
652 let variance_placeholder = (duration_ns as f64 * 0.1).to_bits(); // Simplified variance estimate
653 BASELINE_VARIANCE_STORAGE[index].store(variance_placeholder, Ordering::Relaxed);
654}
655
656/// Detect timing anomalies using default monitoring configuration.
657///
658/// Checks if a measured duration deviates significantly from the expected
659/// baseline timing for an operation. Uses default configuration thresholds
660/// for anomaly detection.
661///
662/// # Parameters
663/// * `operation_name` - Identifier for the operation (for logging)
664/// * `duration` - Measured execution duration
665/// * `baseline` - Statistical baseline for this operation type
666///
667/// # Returns
668/// `Some(anomaly)` if a timing anomaly is detected, `None` if timing is normal
669fn detect_anomaly(
670 operation_name: &str,
671 duration: Duration,
672 baseline: &BaselineTiming,
673) -> Option<TimingAnomaly> {
674 detect_anomaly_with_config(
675 operation_name,
676 duration,
677 baseline,
678 &MonitoringConfig::default(),
679 )
680}
681
682/// Detect timing anomalies with custom configuration and thresholds.
683///
684/// Performs comprehensive anomaly detection by checking multiple criteria:
685/// - Timeout violations (operations taking too long)
686/// - Statistical deviations from baseline (using standard deviations)
687/// - High timing variation (excessive jitter in execution times)
688///
689/// # Parameters
690/// * `_operation_name` - Identifier for the operation (currently unused)
691/// * `duration` - Measured execution duration
692/// * `baseline` - Statistical baseline for comparison
693/// * `config` - Monitoring configuration with thresholds
694///
695/// # Returns
696/// `Some(anomaly)` if any anomaly condition is met, `None` if timing appears normal
697///
698/// # Detection Criteria
699/// 1. **Timeout**: Duration exceeds configured maximum
700/// 2. **Deviation**: Statistical deviation exceeds alert threshold
701/// 3. **Variation**: Timing variation exceeds maximum allowed percentage
702///
703/// # Statistical Analysis
704/// Uses z-score analysis for deviation detection when sufficient samples exist.
705/// Requires at least 10 samples for reliable statistical analysis.
706fn detect_anomaly_with_config(
707 _operation_name: &str,
708 duration: Duration,
709 baseline: &BaselineTiming,
710 config: &MonitoringConfig,
711) -> Option<TimingAnomaly> {
712 // Check for timeout
713 if duration > config.timeout {
714 return Some(TimingAnomaly::Timeout {
715 actual: duration,
716 limit: config.timeout,
717 });
718 }
719
720 // Check for deviation from baseline
721 if let Some(mean_duration) = baseline.mean_duration {
722 if baseline.sample_count > 10 {
723 // Need enough samples for statistical analysis
724 let duration_ns = duration.as_nanos() as f64;
725 let mean_ns = mean_duration.as_nanos() as f64;
726 // Simple approximation of square root for no_std
727 let std_dev = if baseline.variance > 0.0 {
728 let x = baseline.variance;
729 let mut y = 1.0;
730 // Babylonian method approximation
731 for _ in 0..10 {
732 y = (y + x / y) * 0.5;
733 }
734 y
735 } else {
736 0.0
737 };
738
739 if std_dev > 0.0 {
740 let deviation = (duration_ns - mean_ns) / std_dev;
741 if deviation.abs() > config.alert_threshold {
742 return Some(TimingAnomaly::Deviation {
743 deviation,
744 threshold: config.alert_threshold,
745 });
746 }
747 }
748
749 // Check for high variation
750 if baseline.sample_count > 1 {
751 let variation_percent = (std_dev / mean_ns) * 100.0;
752 if variation_percent > config.max_variation_percent {
753 return Some(TimingAnomaly::HighVariation {
754 variation_percent,
755 max_allowed: config.max_variation_percent,
756 });
757 }
758 }
759 }
760 }
761
762 None
763}
764
765#[cfg(test)]
766mod tests {
767 use super::*;
768
769 /// Test basic monitoring operation functionality.
770 ///
771 /// Verifies that the monitoring wrapper functions correctly execute
772 /// operations and return results. Since monitoring is disabled by default,
773 /// this test ensures the fast path (no monitoring) works correctly.
774 #[test]
775 fn test_monitor_operation_basic() {
776 let result = monitor_operation("test_op", || 42);
777 assert_eq!(result.result, 42);
778 // Monitoring is disabled by default, so no anomaly should be detected
779 assert!(result.anomaly.is_none());
780 }
781
782 #[test]
783 fn test_enable_monitoring() {
784 enable_monitoring();
785 assert!(MONITORING_ENABLED.load(Ordering::Relaxed));
786
787 disable_monitoring();
788 assert!(!MONITORING_ENABLED.load(Ordering::Relaxed));
789 }
790
791 /// Test sample rate configuration functionality.
792 ///
793 /// Verifies that the global sample rate can be set and retrieved correctly.
794 /// The sample rate controls how frequently operations are monitored,
795 /// affecting both security coverage and performance overhead.
796 #[test]
797 fn test_sample_rate() {
798 set_sample_rate(10);
799 assert_eq!(SAMPLE_RATE.load(Ordering::Relaxed), 10);
800 }
801
802 /// Test operation name hashing functionality.
803 ///
804 /// Verifies that the operation name hashing function produces consistent
805 /// results for identical inputs and that hash values fit within the
806 /// expected range for the baseline storage array.
807 #[test]
808 fn test_operation_name_hash() {
809 let hash1 = operation_name_hash("test");
810 let hash2 = operation_name_hash("test");
811 let _hash3 = operation_name_hash("different");
812
813 assert_eq!(hash1, hash2);
814 assert!(hash1 < 16); // Should fit in our array
815 // hash3 might be the same or different depending on the hash function
816 }
817}