clock_curve_math/ct/
monitoring.rs

1//! Runtime monitoring for cryptographic operations.
2//!
3//! This module provides runtime timing monitoring capabilities for detecting
4//! timing anomalies in production cryptographic operations. It can help identify
5//! potential side-channel vulnerabilities that manifest at runtime.
6//!
7//! # Features
8//!
9//! ## Timing Anomaly Detection
10//! - Statistical analysis of operation timing
11//! - Threshold-based anomaly detection
12//! - Configurable monitoring levels
13//!
14//! ## Performance Impact Control
15//! - Minimal overhead in production
16//! - Configurable sampling rates
17//! - Optional monitoring for debugging
18//!
19//! ## Alert System
20//! - Configurable alert thresholds
21//! - Logging of timing anomalies
22//! - Integration with monitoring systems
23//!
24//! # Usage
25//!
26//! ```ignore
27//! use clock_curve_math::ct::monitoring::*;
28//!
29//! // Monitor a cryptographic operation
30//! let result = monitor_operation("scalar_mul", || {
31//!     scalar_a.mul(&scalar_b)
32//! });
33//!
34//! // Check for timing anomalies
35//! if let Some(anomaly) = result.anomaly {
36//!     log::warn!("Timing anomaly detected: {:?}", anomaly);
37//! }
38//! ```
39
40use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
41use core::time::Duration;
42
43/// Global monitoring configuration.
44static MONITORING_ENABLED: AtomicBool = AtomicBool::new(false);
45static SAMPLE_RATE: AtomicUsize = AtomicUsize::new(1000); // Sample every 1000 operations
46
47/// Configuration for runtime cryptographic operation monitoring.
48///
49/// This struct controls how the monitoring system behaves, including sampling rates,
50/// alert thresholds, and performance constraints. Proper configuration balances
51/// security monitoring with production performance requirements.
52///
53/// # Security vs Performance Trade-offs
54/// - **Higher sample rates**: Better anomaly detection but increased overhead
55/// - **Lower thresholds**: More sensitive detection but higher false positives
56/// - **Shorter timeouts**: Faster failure detection but may flag legitimate operations
57///
58/// # Recommended Settings
59/// - **Development**: High sample rate (1-10), low thresholds for testing
60/// - **Production**: Low sample rate (100-1000), balanced thresholds for monitoring
61/// - **High-security**: Medium sample rate (10-100), strict thresholds
62///
63/// # Fields
64/// - `enabled`: Master switch for all monitoring functionality
65/// - `sample_rate`: How often to monitor operations (1 = always, higher = less frequent)
66/// - `alert_threshold`: Statistical threshold for anomaly detection (in standard deviations)
67/// - `max_variation_percent`: Maximum allowed timing variation as percentage
68/// - `timeout`: Maximum allowed duration before timing out an operation
69#[derive(Debug, Clone)]
70pub struct MonitoringConfig {
71    /// Master enable switch for monitoring functionality
72    pub enabled: bool,
73    /// Sample rate for monitoring (1 = monitor every operation, higher values = less frequent)
74    /// Lower values provide better coverage but increase performance overhead
75    pub sample_rate: usize,
76    /// Statistical threshold for anomaly detection in standard deviations
77    /// Values like 2.0-3.0 are typical for detecting significant deviations
78    pub alert_threshold: f64,
79    /// Maximum allowed timing variation as a percentage of mean execution time
80    /// Helps detect systems under load or environmental changes
81    pub max_variation_percent: f64,
82    /// Maximum allowed duration for any monitored operation
83    /// Operations exceeding this timeout trigger immediate alerts
84    pub timeout: Duration,
85}
86
87impl Default for MonitoringConfig {
88    /// Creates a default monitoring configuration optimized for production use.
89    ///
90    /// The default configuration prioritizes performance while providing basic
91    /// monitoring capabilities. Monitoring is disabled by default to avoid
92    /// performance impact in production environments.
93    ///
94    /// # Default Values
95    /// - `enabled: false` - Monitoring disabled for performance
96    /// - `sample_rate: 1000` - Monitor 1 in 1000 operations
97    /// - `alert_threshold: 3.0` - Alert on 3-sigma deviations (99.7% confidence)
98    /// - `max_variation_percent: 10.0` - Allow 10% timing variation
99    /// - `timeout: 100ms` - Operations should complete within 100 milliseconds
100    ///
101    /// # Usage
102    /// ```ignore
103    /// use clock_curve_math::ct::monitoring::MonitoringConfig;
104    ///
105    /// // Use defaults (monitoring disabled)
106    /// let config = MonitoringConfig::default();
107    ///
108    /// // Enable monitoring for development
109    /// let dev_config = MonitoringConfig {
110    ///     enabled: true,
111    ///     sample_rate: 10, // Monitor more frequently
112    ///     ..MonitoringConfig::default()
113    /// };
114    /// ```
115    fn default() -> Self {
116        Self {
117            enabled: false,
118            sample_rate: 1000,
119            alert_threshold: 3.0,        // 3 standard deviations
120            max_variation_percent: 10.0, // 10% variation allowed
121            timeout: Duration::from_millis(100),
122        }
123    }
124}
125
126/// Result of monitoring a cryptographic operation.
127#[derive(Debug, Clone)]
128pub struct MonitoringResult<T> {
129    /// The result of the operation
130    pub result: T,
131    /// Timing information
132    pub timing: TimingInfo,
133    /// Detected anomaly (if any)
134    pub anomaly: Option<TimingAnomaly>,
135}
136
137/// Timing information for an operation.
138#[derive(Debug, Clone)]
139pub struct TimingInfo {
140    /// Duration of the operation
141    pub duration: Duration,
142    /// Expected duration (from baseline)
143    pub expected_duration: Option<Duration>,
144    /// Deviation from expected (in standard deviations)
145    pub deviation: Option<f64>,
146}
147
148/// Detected timing anomaly in a cryptographic operation.
149///
150/// Represents different types of timing irregularities that may indicate
151/// security issues, performance problems, or environmental changes affecting
152/// cryptographic operation timing.
153///
154/// # Anomaly Types
155/// - **Timeout**: Operation exceeded maximum allowed duration
156/// - **Deviation**: Statistical deviation from expected timing baseline
157/// - **HighVariation**: Excessive timing jitter/variation detected
158///
159/// # Security Implications
160/// Timing anomalies can indicate:
161/// - Side-channel vulnerabilities (timing leaks)
162/// - Performance degradation affecting security margins
163/// - Environmental changes (CPU frequency, memory pressure)
164/// - Resource contention in shared environments
165///
166/// # Response Actions
167/// Different anomaly types suggest different responses:
168/// - **Timeout**: Immediate investigation (possible DoS or hanging operation)
169/// - **Deviation**: Statistical monitoring (may indicate side-channel)
170/// - **HighVariation**: System health check (environmental factors)
171#[derive(Debug, Clone)]
172pub enum TimingAnomaly {
173    /// Operation exceeded the configured timeout duration.
174    ///
175    /// The operation took longer than the maximum allowed time, which could
176    /// indicate a performance issue, resource exhaustion, or security problem.
177    /// This is the most serious anomaly type requiring immediate attention.
178    Timeout {
179        /// Actual measured duration of the operation
180        actual: Duration,
181        /// Maximum allowed duration before timeout
182        limit: Duration,
183    },
184
185    /// Operation timing deviated significantly from statistical baseline.
186    ///
187    /// The operation's execution time differed from the expected duration
188    /// by more than the configured threshold (measured in standard deviations).
189    /// This could indicate timing-based side-channel vulnerabilities.
190    Deviation {
191        /// Statistical deviation from baseline in standard deviations
192        /// (e.g., 3.5 means 3.5 standard deviations from mean)
193        deviation: f64,
194        /// Maximum allowed deviation threshold
195        threshold: f64,
196    },
197
198    /// Operation exhibited excessive timing variation.
199    ///
200    /// The operation's timing showed higher variation than allowed,
201    /// indicating inconsistent performance that could mask timing attacks
202    /// or indicate system instability.
203    HighVariation {
204        /// Measured timing variation as percentage of mean duration
205        variation_percent: f64,
206        /// Maximum allowed variation percentage
207        max_allowed: f64,
208    },
209}
210
211/// Monitor a cryptographic operation with timing checks.
212///
213/// This function executes the operation while monitoring its timing and
214/// checking for anomalies. Monitoring is only performed according to the
215/// configured sample rate to minimize performance impact.
216pub fn monitor_operation<F, T>(operation_name: &str, operation: F) -> MonitoringResult<T>
217where
218    F: FnOnce() -> T,
219{
220    if !should_monitor() {
221        // Fast path - no monitoring
222        return MonitoringResult {
223            result: operation(),
224            timing: TimingInfo {
225                duration: Duration::from_nanos(0),
226                expected_duration: None,
227                deviation: None,
228            },
229            anomaly: None,
230        };
231    }
232
233    // Get baseline timing for this operation type
234    let baseline = get_baseline_timing(operation_name);
235
236    // Time the operation
237    let start = get_current_time();
238    let result = operation();
239    let end = get_current_time();
240    let duration = end.saturating_sub(start);
241
242    // Check for anomalies
243    let anomaly = detect_anomaly(operation_name, duration, &baseline);
244
245    // Update baseline with new measurement
246    update_baseline_timing(operation_name, duration);
247
248    MonitoringResult {
249        result,
250        timing: TimingInfo {
251            duration,
252            expected_duration: baseline.mean_duration,
253            deviation: anomaly.as_ref().and_then(|a| match a {
254                TimingAnomaly::Deviation { deviation, .. } => Some(*deviation),
255                _ => None,
256            }),
257        },
258        anomaly,
259    }
260}
261
262/// Monitor a cryptographic operation with custom configuration.
263pub fn monitor_operation_with_config<F, T>(
264    operation_name: &str,
265    config: &MonitoringConfig,
266    operation: F,
267) -> MonitoringResult<T>
268where
269    F: FnOnce() -> T,
270{
271    if !config.enabled || !should_monitor_with_rate(config.sample_rate) {
272        return MonitoringResult {
273            result: operation(),
274            timing: TimingInfo {
275                duration: Duration::from_nanos(0),
276                expected_duration: None,
277                deviation: None,
278            },
279            anomaly: None,
280        };
281    }
282
283    let baseline = get_baseline_timing(operation_name);
284    let start = get_current_time();
285    let result = operation();
286    let end = get_current_time();
287    let duration = end.saturating_sub(start);
288
289    let anomaly = detect_anomaly_with_config(operation_name, duration, &baseline, config);
290    update_baseline_timing(operation_name, duration);
291
292    MonitoringResult {
293        result,
294        timing: TimingInfo {
295            duration,
296            expected_duration: baseline.mean_duration,
297            deviation: anomaly.as_ref().and_then(|a| match a {
298                TimingAnomaly::Deviation { deviation, .. } => Some(*deviation),
299                _ => None,
300            }),
301        },
302        anomaly,
303    }
304}
305
306/// Enable runtime monitoring globally.
307///
308/// Activates timing monitoring for all cryptographic operations that use
309/// the monitoring functions. When enabled, operations will be periodically
310/// sampled and checked for timing anomalies based on the configured sample rate.
311///
312/// # Performance Impact
313/// Enabling monitoring adds overhead to cryptographic operations, though the
314/// impact is minimized through sampling. The exact overhead depends on the
315/// sample rate and the complexity of monitored operations.
316///
317/// # Security Benefits
318/// - Detects timing-based side-channel vulnerabilities in production
319/// - Identifies performance regressions that could indicate security issues
320/// - Provides early warning of environmental changes affecting security
321///
322/// # Thread Safety
323/// This function is thread-safe and can be called from any thread.
324pub fn enable_monitoring() {
325    MONITORING_ENABLED.store(true, Ordering::Relaxed);
326}
327
328/// Disable runtime monitoring globally.
329///
330/// Deactivates timing monitoring to minimize performance overhead in production
331/// environments where monitoring is not required. When disabled, monitoring
332/// functions will execute operations without timing checks.
333///
334/// # Use Cases
335/// - Production deployments where performance is critical
336/// - Development environments needing maximum speed
337/// - Situations where monitoring overhead is unacceptable
338///
339/// # Security Considerations
340/// Disabling monitoring removes the ability to detect timing anomalies that
341/// could indicate side-channel vulnerabilities. Use with caution.
342///
343/// # Thread Safety
344/// This function is thread-safe and can be called from any thread.
345pub fn disable_monitoring() {
346    MONITORING_ENABLED.store(false, Ordering::Relaxed);
347}
348
349/// Set the global monitoring sample rate.
350///
351/// Controls how frequently operations are monitored. A sample rate of 1 means
352/// every operation is monitored, while higher values mean less frequent monitoring.
353/// The sample rate affects both performance overhead and anomaly detection coverage.
354///
355/// # Parameters
356/// * `rate` - Sample rate (1 = monitor every operation, higher = less frequent)
357///
358/// # Performance vs Security Trade-off
359/// - **Rate = 1**: Maximum security coverage, highest performance impact
360/// - **Rate = 100**: Good balance for development, moderate overhead
361/// - **Rate = 1000**: Minimal production overhead, basic monitoring coverage
362///
363/// # Examples
364/// ```ignore
365/// use clock_curve_math::ct::monitoring::set_sample_rate;
366///
367/// // Monitor every 100th operation (good for development)
368/// set_sample_rate(100);
369///
370/// // Monitor every 1000th operation (good for production)
371/// set_sample_rate(1000);
372/// ```
373///
374/// # Thread Safety
375/// This function is thread-safe and can be called from any thread.
376pub fn set_sample_rate(rate: usize) {
377    SAMPLE_RATE.store(rate, Ordering::Relaxed);
378}
379
380/// Check if monitoring should be performed based on global configuration.
381///
382/// Determines whether the current operation should be monitored based on
383/// the global monitoring enable flag and sample rate. This function is
384/// called frequently and must be highly optimized.
385///
386/// # Returns
387/// `true` if monitoring should be performed, `false` if the operation
388/// should execute without monitoring overhead.
389///
390/// # Performance
391/// This function uses atomic operations and is designed to be fast
392/// when monitoring is disabled (the common production case).
393fn should_monitor() -> bool {
394    if !MONITORING_ENABLED.load(Ordering::Relaxed) {
395        return false;
396    }
397
398    should_monitor_with_rate(SAMPLE_RATE.load(Ordering::Relaxed))
399}
400
401/// Check if monitoring should be performed with a specific sample rate.
402///
403/// Implements sampling-based monitoring where only a fraction of operations
404/// are monitored to reduce performance overhead while maintaining statistical
405/// coverage for anomaly detection.
406///
407/// # Parameters
408/// * `sample_rate` - How often to monitor (1 = always, higher = less frequent)
409///
410/// # Algorithm
411/// Uses a global counter that increments atomically for each check.
412/// When `counter % sample_rate == 0`, monitoring is performed.
413/// This ensures even sampling distribution over time.
414///
415/// # Returns
416/// `true` if this operation should be monitored, `false` otherwise
417///
418/// # Thread Safety
419/// Uses atomic operations to ensure correct behavior in multi-threaded environments.
420fn should_monitor_with_rate(sample_rate: usize) -> bool {
421    if sample_rate <= 1 {
422        return true; // Monitor every operation
423    }
424
425    static COUNTER: AtomicUsize = AtomicUsize::new(0);
426    let count = COUNTER.fetch_add(1, Ordering::Relaxed);
427    count % sample_rate == 0
428}
429
430/// Get current time (architecture-dependent implementation).
431#[cfg(feature = "std")]
432fn get_current_time() -> std::time::Instant {
433    std::time::Instant::now()
434}
435
436#[cfg(not(feature = "std"))]
437fn get_current_time() -> core::time::Duration {
438    // Fallback for no_std - not accurate but provides basic functionality
439    // In a real implementation, this would use a hardware timer
440    core::time::Duration::from_nanos(0)
441}
442
443/// Statistical baseline timing data for an operation type.
444///
445/// Maintains running statistics about the expected execution time for a
446/// specific cryptographic operation. Used to detect timing anomalies by
447/// comparing new measurements against historical performance data.
448///
449/// # Statistical Properties
450/// - Tracks mean execution time and variance
451/// - Uses Welford's online algorithm for numerical stability
452/// - Maintains sample count for statistical confidence
453///
454/// # Fields
455/// - `mean_duration`: Expected execution time (None until first sample)
456/// - `variance`: Statistical variance in execution times
457/// - `sample_count`: Number of measurements collected
458///
459/// # Usage in Anomaly Detection
460/// - Requires minimum samples (typically 10+) for reliable statistics
461/// - Uses standard deviation analysis for threshold detection
462/// - Accounts for natural timing variation in real systems
463///
464/// # Memory Management
465/// Stored in a fixed-size hash table for simplicity. In production,
466/// this would typically use a more scalable data structure.
467#[derive(Debug, Clone)]
468struct BaselineTiming {
469    /// Mean execution duration (None until first measurement collected)
470    mean_duration: Option<Duration>,
471    /// Statistical variance of execution times (for anomaly detection)
472    variance: f64,
473    /// Number of timing samples collected (affects statistical confidence)
474    sample_count: usize,
475}
476
477impl Default for BaselineTiming {
478    /// Creates a new baseline timing with no measurements.
479    ///
480    /// Initializes a baseline timing structure representing the state before
481    /// any measurements have been collected. This is the starting state for
482    /// new operation types that haven't been monitored yet.
483    ///
484    /// # Initial State
485    /// - `mean_duration: None` - No baseline established yet
486    /// - `variance: 0.0` - Zero variance initially
487    /// - `sample_count: 0` - No samples collected
488    ///
489    /// The baseline will be populated as operations are monitored and
490    /// timing measurements are collected over time.
491    fn default() -> Self {
492        Self {
493            mean_duration: None,
494            variance: 0.0,
495            sample_count: 0,
496        }
497    }
498}
499
500// Global storage for baseline timings using thread-safe static
501// Note: This simplified implementation uses atomic operations for thread safety
502// In production, consider using a proper concurrent hash map
503use core::sync::atomic::AtomicU64;
504
505static BASELINE_MEAN_STORAGE: [AtomicU64; 16] = [
506    AtomicU64::new(0),
507    AtomicU64::new(0),
508    AtomicU64::new(0),
509    AtomicU64::new(0),
510    AtomicU64::new(0),
511    AtomicU64::new(0),
512    AtomicU64::new(0),
513    AtomicU64::new(0),
514    AtomicU64::new(0),
515    AtomicU64::new(0),
516    AtomicU64::new(0),
517    AtomicU64::new(0),
518    AtomicU64::new(0),
519    AtomicU64::new(0),
520    AtomicU64::new(0),
521    AtomicU64::new(0),
522];
523
524static BASELINE_VARIANCE_STORAGE: [AtomicU64; 16] = [
525    AtomicU64::new(0),
526    AtomicU64::new(0),
527    AtomicU64::new(0),
528    AtomicU64::new(0),
529    AtomicU64::new(0),
530    AtomicU64::new(0),
531    AtomicU64::new(0),
532    AtomicU64::new(0),
533    AtomicU64::new(0),
534    AtomicU64::new(0),
535    AtomicU64::new(0),
536    AtomicU64::new(0),
537    AtomicU64::new(0),
538    AtomicU64::new(0),
539    AtomicU64::new(0),
540    AtomicU64::new(0),
541];
542
543static BASELINE_SAMPLE_STORAGE: [AtomicUsize; 16] = [
544    AtomicUsize::new(0),
545    AtomicUsize::new(0),
546    AtomicUsize::new(0),
547    AtomicUsize::new(0),
548    AtomicUsize::new(0),
549    AtomicUsize::new(0),
550    AtomicUsize::new(0),
551    AtomicUsize::new(0),
552    AtomicUsize::new(0),
553    AtomicUsize::new(0),
554    AtomicUsize::new(0),
555    AtomicUsize::new(0),
556    AtomicUsize::new(0),
557    AtomicUsize::new(0),
558    AtomicUsize::new(0),
559    AtomicUsize::new(0),
560];
561
562/// Simple hash function to map operation names to indices.
563fn operation_name_hash(name: &str) -> usize {
564    let mut hash = 0usize;
565    for byte in name.bytes() {
566        hash = hash.wrapping_mul(31).wrapping_add(byte as usize);
567    }
568    hash % 16 // Fit into our fixed-size array
569}
570
571/// Get baseline timing statistics for an operation type.
572///
573/// Retrieves the current baseline timing data for a specific operation,
574/// which includes mean duration, variance, and sample count. This baseline
575/// is used to detect timing anomalies by comparing new measurements against
576/// historical performance.
577///
578/// # Parameters
579/// * `operation_name` - Identifier for the operation type
580///
581/// # Returns
582/// Current baseline timing statistics for the operation
583///
584/// # Implementation Notes
585/// Uses thread-safe atomic operations for concurrent access.
586/// In production, this would typically use a proper concurrent hash map.
587fn get_baseline_timing(operation_name: &str) -> BaselineTiming {
588    let index = operation_name_hash(operation_name);
589
590    // Reconstruct BaselineTiming from atomic storage
591    let mean_ns = BASELINE_MEAN_STORAGE[index].load(Ordering::Relaxed);
592    let variance_bits = BASELINE_VARIANCE_STORAGE[index].load(Ordering::Relaxed);
593    let sample_count = BASELINE_SAMPLE_STORAGE[index].load(Ordering::Relaxed);
594
595    // Convert stored values back to BaselineTiming
596    let mean_duration = if mean_ns > 0 {
597        Some(Duration::from_nanos(mean_ns))
598    } else {
599        None
600    };
601
602    // Reconstruct f64 from bits (simplified - in practice would need proper serialization)
603    let variance = f64::from_bits(variance_bits);
604
605    BaselineTiming {
606        mean_duration,
607        variance,
608        sample_count,
609    }
610}
611
612/// Update baseline timing statistics with a new measurement.
613///
614/// Incorporates a new timing measurement into the running statistics for
615/// an operation type. Uses thread-safe atomic operations to maintain
616/// mean and variance estimates without storing all historical measurements.
617///
618/// # Parameters
619/// * `operation_name` - Identifier for the operation type
620/// * `duration` - Measured duration of the latest operation execution
621///
622/// # Algorithm
623/// - Uses atomic operations for thread-safe updates
624/// - Simplified statistical tracking compared to full Welford's algorithm
625/// - Suitable for monitoring purposes where exact precision is less critical than thread safety
626///
627/// # Performance
628/// O(1) time complexity with atomic operations, suitable for concurrent monitoring.
629fn update_baseline_timing(operation_name: &str, duration: Duration) {
630    let index = operation_name_hash(operation_name);
631    let duration_ns = duration.as_nanos() as u64;
632
633    // Update sample count atomically
634    let old_count = BASELINE_SAMPLE_STORAGE[index].fetch_add(1, Ordering::Relaxed);
635    let new_count = old_count + 1;
636
637    // Update mean using atomic operations (simplified running average)
638    let old_mean = BASELINE_MEAN_STORAGE[index].load(Ordering::Relaxed);
639    if old_mean == 0 {
640        // First measurement
641        BASELINE_MEAN_STORAGE[index].store(duration_ns, Ordering::Relaxed);
642    } else {
643        // Running average: new_mean = (old_mean * old_count + duration) / new_count
644        let new_mean =
645            ((old_mean as u128 * old_count as u128) + duration_ns as u128) / new_count as u128;
646        BASELINE_MEAN_STORAGE[index].store(new_mean as u64, Ordering::Relaxed);
647    }
648
649    // For variance, we use a simplified approach - store the variance as bits
650    // In practice, this would need more sophisticated atomic statistical tracking
651    // For now, we store a placeholder variance (could be improved with better atomic math)
652    let variance_placeholder = (duration_ns as f64 * 0.1).to_bits(); // Simplified variance estimate
653    BASELINE_VARIANCE_STORAGE[index].store(variance_placeholder, Ordering::Relaxed);
654}
655
656/// Detect timing anomalies using default monitoring configuration.
657///
658/// Checks if a measured duration deviates significantly from the expected
659/// baseline timing for an operation. Uses default configuration thresholds
660/// for anomaly detection.
661///
662/// # Parameters
663/// * `operation_name` - Identifier for the operation (for logging)
664/// * `duration` - Measured execution duration
665/// * `baseline` - Statistical baseline for this operation type
666///
667/// # Returns
668/// `Some(anomaly)` if a timing anomaly is detected, `None` if timing is normal
669fn detect_anomaly(
670    operation_name: &str,
671    duration: Duration,
672    baseline: &BaselineTiming,
673) -> Option<TimingAnomaly> {
674    detect_anomaly_with_config(
675        operation_name,
676        duration,
677        baseline,
678        &MonitoringConfig::default(),
679    )
680}
681
682/// Detect timing anomalies with custom configuration and thresholds.
683///
684/// Performs comprehensive anomaly detection by checking multiple criteria:
685/// - Timeout violations (operations taking too long)
686/// - Statistical deviations from baseline (using standard deviations)
687/// - High timing variation (excessive jitter in execution times)
688///
689/// # Parameters
690/// * `_operation_name` - Identifier for the operation (currently unused)
691/// * `duration` - Measured execution duration
692/// * `baseline` - Statistical baseline for comparison
693/// * `config` - Monitoring configuration with thresholds
694///
695/// # Returns
696/// `Some(anomaly)` if any anomaly condition is met, `None` if timing appears normal
697///
698/// # Detection Criteria
699/// 1. **Timeout**: Duration exceeds configured maximum
700/// 2. **Deviation**: Statistical deviation exceeds alert threshold
701/// 3. **Variation**: Timing variation exceeds maximum allowed percentage
702///
703/// # Statistical Analysis
704/// Uses z-score analysis for deviation detection when sufficient samples exist.
705/// Requires at least 10 samples for reliable statistical analysis.
706fn detect_anomaly_with_config(
707    _operation_name: &str,
708    duration: Duration,
709    baseline: &BaselineTiming,
710    config: &MonitoringConfig,
711) -> Option<TimingAnomaly> {
712    // Check for timeout
713    if duration > config.timeout {
714        return Some(TimingAnomaly::Timeout {
715            actual: duration,
716            limit: config.timeout,
717        });
718    }
719
720    // Check for deviation from baseline
721    if let Some(mean_duration) = baseline.mean_duration {
722        if baseline.sample_count > 10 {
723            // Need enough samples for statistical analysis
724            let duration_ns = duration.as_nanos() as f64;
725            let mean_ns = mean_duration.as_nanos() as f64;
726            // Simple approximation of square root for no_std
727            let std_dev = if baseline.variance > 0.0 {
728                let x = baseline.variance;
729                let mut y = 1.0;
730                // Babylonian method approximation
731                for _ in 0..10 {
732                    y = (y + x / y) * 0.5;
733                }
734                y
735            } else {
736                0.0
737            };
738
739            if std_dev > 0.0 {
740                let deviation = (duration_ns - mean_ns) / std_dev;
741                if deviation.abs() > config.alert_threshold {
742                    return Some(TimingAnomaly::Deviation {
743                        deviation,
744                        threshold: config.alert_threshold,
745                    });
746                }
747            }
748
749            // Check for high variation
750            if baseline.sample_count > 1 {
751                let variation_percent = (std_dev / mean_ns) * 100.0;
752                if variation_percent > config.max_variation_percent {
753                    return Some(TimingAnomaly::HighVariation {
754                        variation_percent,
755                        max_allowed: config.max_variation_percent,
756                    });
757                }
758            }
759        }
760    }
761
762    None
763}
764
765#[cfg(test)]
766mod tests {
767    use super::*;
768
769    /// Test basic monitoring operation functionality.
770    ///
771    /// Verifies that the monitoring wrapper functions correctly execute
772    /// operations and return results. Since monitoring is disabled by default,
773    /// this test ensures the fast path (no monitoring) works correctly.
774    #[test]
775    fn test_monitor_operation_basic() {
776        let result = monitor_operation("test_op", || 42);
777        assert_eq!(result.result, 42);
778        // Monitoring is disabled by default, so no anomaly should be detected
779        assert!(result.anomaly.is_none());
780    }
781
782    #[test]
783    fn test_enable_monitoring() {
784        enable_monitoring();
785        assert!(MONITORING_ENABLED.load(Ordering::Relaxed));
786
787        disable_monitoring();
788        assert!(!MONITORING_ENABLED.load(Ordering::Relaxed));
789    }
790
791    /// Test sample rate configuration functionality.
792    ///
793    /// Verifies that the global sample rate can be set and retrieved correctly.
794    /// The sample rate controls how frequently operations are monitored,
795    /// affecting both security coverage and performance overhead.
796    #[test]
797    fn test_sample_rate() {
798        set_sample_rate(10);
799        assert_eq!(SAMPLE_RATE.load(Ordering::Relaxed), 10);
800    }
801
802    /// Test operation name hashing functionality.
803    ///
804    /// Verifies that the operation name hashing function produces consistent
805    /// results for identical inputs and that hash values fit within the
806    /// expected range for the baseline storage array.
807    #[test]
808    fn test_operation_name_hash() {
809        let hash1 = operation_name_hash("test");
810        let hash2 = operation_name_hash("test");
811        let _hash3 = operation_name_hash("different");
812
813        assert_eq!(hash1, hash2);
814        assert!(hash1 < 16); // Should fit in our array
815        // hash3 might be the same or different depending on the hash function
816    }
817}
clock_curve_math/ct/monitoring.rs

clock_curve_math/ct/
monitoring.rs