Skip to main content

entrenar/integrity/
trace_storage.rs

1//! Trace Storage Policy (ENT-015)
2//!
3//! Provides configurable trace storage with compression and retention policies
4//! for managing experiment traces efficiently.
5
6use serde::{Deserialize, Serialize};
7
8/// Number of days in a standard year, used for archival retention policy.
9const DAYS_PER_YEAR: u32 = 365;
10
11/// Compression algorithm for trace data
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
13pub enum CompressionAlgorithm {
14    /// No compression
15    #[default]
16    None,
17    /// Run-length encoding (good for sparse data)
18    Rle,
19    /// Zstandard compression (good balance of speed/ratio)
20    Zstd,
21    /// LZ4 compression (fastest)
22    Lz4,
23}
24
25impl CompressionAlgorithm {
26    /// Returns the typical compression ratio for this algorithm
27    ///
28    /// Values are approximate and depend on data characteristics.
29    pub fn typical_ratio(&self) -> f64 {
30        match self {
31            Self::None => 1.0,
32            Self::Rle => 2.0,  // Varies widely based on data
33            Self::Zstd => 4.0, // Good general compression
34            Self::Lz4 => 2.5,  // Fast but lower ratio
35        }
36    }
37
38    /// Returns the relative speed of this algorithm (higher = faster)
39    ///
40    /// Scale: 1.0 = baseline (Zstd), higher = faster
41    pub fn relative_speed(&self) -> f64 {
42        match self {
43            Self::None => 10.0, // No processing
44            Self::Rle => 5.0,   // Simple algorithm
45            Self::Zstd => 1.0,  // Baseline
46            Self::Lz4 => 3.0,   // Optimized for speed
47        }
48    }
49
50    /// Parse compression algorithm from string
51    pub fn parse(s: &str) -> Option<Self> {
52        match s.to_lowercase().as_str() {
53            "none" | "off" | "disabled" => Some(Self::None),
54            "rle" | "runlength" | "run-length" => Some(Self::Rle),
55            "zstd" | "zstandard" => Some(Self::Zstd),
56            "lz4" => Some(Self::Lz4),
57            _ => None,
58        }
59    }
60}
61
62impl std::fmt::Display for CompressionAlgorithm {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        match self {
65            Self::None => write!(f, "none"),
66            Self::Rle => write!(f, "rle"),
67            Self::Zstd => write!(f, "zstd"),
68            Self::Lz4 => write!(f, "lz4"),
69        }
70    }
71}
72
73/// Trace storage configuration policy
74///
75/// Controls how experiment traces are stored, compressed, and retained.
76#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
77pub struct TraceStoragePolicy {
78    /// Compression algorithm to use for trace data
79    pub compression: CompressionAlgorithm,
80
81    /// Number of days to retain trace data (0 = indefinite)
82    pub retention_days: u32,
83
84    /// Maximum storage size in bytes (0 = unlimited)
85    pub max_size_bytes: u64,
86
87    /// Sampling rate for trace collection (0.0-1.0)
88    /// 1.0 = collect all traces, 0.5 = collect 50%, etc.
89    pub sample_rate: f64,
90}
91
92impl Default for TraceStoragePolicy {
93    fn default() -> Self {
94        Self {
95            compression: CompressionAlgorithm::Zstd,
96            retention_days: 30,
97            max_size_bytes: 10 * 1024 * 1024 * 1024, // 10 GB
98            sample_rate: 1.0,
99        }
100    }
101}
102
103impl TraceStoragePolicy {
104    /// Create a new trace storage policy
105    pub fn new(
106        compression: CompressionAlgorithm,
107        retention_days: u32,
108        max_size_bytes: u64,
109        sample_rate: f64,
110    ) -> Self {
111        Self {
112            compression,
113            retention_days,
114            max_size_bytes,
115            sample_rate: sample_rate.clamp(0.0, 1.0),
116        }
117    }
118
119    /// Create a minimal policy (no compression, short retention)
120    pub fn minimal() -> Self {
121        Self {
122            compression: CompressionAlgorithm::None,
123            retention_days: 7,
124            max_size_bytes: 1024 * 1024 * 1024, // 1 GB
125            sample_rate: 0.1,
126        }
127    }
128
129    /// Create a policy optimized for development (high detail, short retention)
130    pub fn development() -> Self {
131        Self {
132            compression: CompressionAlgorithm::Lz4,
133            retention_days: 7,
134            max_size_bytes: 5 * 1024 * 1024 * 1024, // 5 GB
135            sample_rate: 1.0,
136        }
137    }
138
139    /// Create a policy optimized for production (balanced)
140    pub fn production() -> Self {
141        Self {
142            compression: CompressionAlgorithm::Zstd,
143            retention_days: 90,
144            max_size_bytes: 50 * 1024 * 1024 * 1024, // 50 GB
145            sample_rate: 0.5,
146        }
147    }
148
149    /// Create a policy for archival (high compression, long retention)
150    pub fn archival() -> Self {
151        Self {
152            compression: CompressionAlgorithm::Zstd,
153            retention_days: DAYS_PER_YEAR,
154            max_size_bytes: 100 * 1024 * 1024 * 1024, // 100 GB
155            sample_rate: 0.25,
156        }
157    }
158
159    /// Check if a trace should be sampled based on the sample rate
160    ///
161    /// Uses a deterministic hash of the trace ID for consistent sampling.
162    pub fn should_sample(&self, trace_id: &str) -> bool {
163        if self.sample_rate >= 1.0 {
164            return true;
165        }
166        if self.sample_rate <= 0.0 {
167            return false;
168        }
169
170        // Simple deterministic hash for consistent sampling
171        let hash =
172            trace_id.bytes().fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(u64::from(b)));
173
174        let normalized = (hash % 10000) as f64 / 10000.0;
175        normalized < self.sample_rate
176    }
177
178    /// Estimate the compressed size of data given uncompressed size
179    pub fn estimate_compressed_size(&self, uncompressed_bytes: u64) -> u64 {
180        let ratio = self.compression.typical_ratio();
181        (uncompressed_bytes as f64 / ratio).ceil() as u64
182    }
183
184    /// Check if adding data would exceed the storage limit
185    pub fn would_exceed_limit(&self, current_bytes: u64, additional_bytes: u64) -> bool {
186        if self.max_size_bytes == 0 {
187            return false; // Unlimited
188        }
189
190        let estimated_additional = self.estimate_compressed_size(additional_bytes);
191        current_bytes.saturating_add(estimated_additional) > self.max_size_bytes
192    }
193
194    /// Check if retention is indefinite
195    pub fn is_indefinite_retention(&self) -> bool {
196        self.retention_days == 0
197    }
198
199    /// Check if storage is unlimited
200    pub fn is_unlimited_storage(&self) -> bool {
201        self.max_size_bytes == 0
202    }
203
204    /// Get the effective sample percentage (0-100)
205    pub fn sample_percentage(&self) -> f64 {
206        self.sample_rate * 100.0
207    }
208
209    /// Validate the policy configuration
210    pub fn validate(&self) -> Result<(), PolicyValidationError> {
211        if self.sample_rate < 0.0 || self.sample_rate > 1.0 {
212            return Err(PolicyValidationError::InvalidSampleRate(self.sample_rate));
213        }
214
215        // Warn about potentially problematic configurations
216        if self.sample_rate < 0.01 && self.retention_days > 30 {
217            // Very low sampling with long retention might indicate misconfiguration
218        }
219
220        Ok(())
221    }
222}
223
224/// Errors from policy validation
225#[derive(Debug, Clone, PartialEq)]
226pub enum PolicyValidationError {
227    /// Sample rate must be between 0.0 and 1.0
228    InvalidSampleRate(f64),
229}
230
231impl std::fmt::Display for PolicyValidationError {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        match self {
234            Self::InvalidSampleRate(rate) => {
235                write!(f, "Invalid sample rate {rate}: must be between 0.0 and 1.0")
236            }
237        }
238    }
239}
240
241impl std::error::Error for PolicyValidationError {}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn test_compression_algorithm_default() {
249        let algo = CompressionAlgorithm::default();
250        assert_eq!(algo, CompressionAlgorithm::None);
251    }
252
253    #[test]
254    fn test_compression_algorithm_typical_ratio() {
255        assert!((CompressionAlgorithm::None.typical_ratio() - 1.0).abs() < f64::EPSILON);
256        assert!(
257            CompressionAlgorithm::Zstd.typical_ratio() > CompressionAlgorithm::Lz4.typical_ratio()
258        );
259    }
260
261    #[test]
262    fn test_compression_algorithm_relative_speed() {
263        assert!(
264            CompressionAlgorithm::None.relative_speed()
265                > CompressionAlgorithm::Lz4.relative_speed()
266        );
267        assert!(
268            CompressionAlgorithm::Lz4.relative_speed()
269                > CompressionAlgorithm::Zstd.relative_speed()
270        );
271    }
272
273    #[test]
274    fn test_compression_algorithm_parse() {
275        assert_eq!(CompressionAlgorithm::parse("none"), Some(CompressionAlgorithm::None));
276        assert_eq!(CompressionAlgorithm::parse("off"), Some(CompressionAlgorithm::None));
277        assert_eq!(CompressionAlgorithm::parse("rle"), Some(CompressionAlgorithm::Rle));
278        assert_eq!(CompressionAlgorithm::parse("zstd"), Some(CompressionAlgorithm::Zstd));
279        assert_eq!(CompressionAlgorithm::parse("ZSTANDARD"), Some(CompressionAlgorithm::Zstd));
280        assert_eq!(CompressionAlgorithm::parse("lz4"), Some(CompressionAlgorithm::Lz4));
281        assert_eq!(CompressionAlgorithm::parse("invalid"), None);
282    }
283
284    #[test]
285    fn test_compression_algorithm_display() {
286        assert_eq!(format!("{}", CompressionAlgorithm::None), "none");
287        assert_eq!(format!("{}", CompressionAlgorithm::Rle), "rle");
288        assert_eq!(format!("{}", CompressionAlgorithm::Zstd), "zstd");
289        assert_eq!(format!("{}", CompressionAlgorithm::Lz4), "lz4");
290    }
291
292    #[test]
293    fn test_trace_storage_policy_default() {
294        let policy = TraceStoragePolicy::default();
295
296        assert_eq!(policy.compression, CompressionAlgorithm::Zstd);
297        assert_eq!(policy.retention_days, 30);
298        assert_eq!(policy.max_size_bytes, 10 * 1024 * 1024 * 1024);
299        assert!((policy.sample_rate - 1.0).abs() < f64::EPSILON);
300    }
301
302    #[test]
303    fn test_trace_storage_policy_new() {
304        let policy = TraceStoragePolicy::new(CompressionAlgorithm::Lz4, 14, 1024 * 1024, 0.75);
305
306        assert_eq!(policy.compression, CompressionAlgorithm::Lz4);
307        assert_eq!(policy.retention_days, 14);
308        assert_eq!(policy.max_size_bytes, 1024 * 1024);
309        assert!((policy.sample_rate - 0.75).abs() < f64::EPSILON);
310    }
311
312    #[test]
313    fn test_trace_storage_policy_new_clamps_sample_rate() {
314        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 1.5);
315        assert!((policy.sample_rate - 1.0).abs() < f64::EPSILON);
316
317        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, -0.5);
318        assert!((policy.sample_rate - 0.0).abs() < f64::EPSILON);
319    }
320
321    #[test]
322    fn test_trace_storage_policy_presets() {
323        let minimal = TraceStoragePolicy::minimal();
324        assert_eq!(minimal.compression, CompressionAlgorithm::None);
325        assert!(minimal.sample_rate < 0.5);
326
327        let dev = TraceStoragePolicy::development();
328        assert_eq!(dev.compression, CompressionAlgorithm::Lz4);
329        assert!((dev.sample_rate - 1.0).abs() < f64::EPSILON);
330
331        let prod = TraceStoragePolicy::production();
332        assert_eq!(prod.compression, CompressionAlgorithm::Zstd);
333        assert!(prod.retention_days > dev.retention_days);
334
335        let archive = TraceStoragePolicy::archival();
336        assert!(archive.retention_days > prod.retention_days);
337    }
338
339    #[test]
340    fn test_should_sample_always() {
341        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 1.0);
342
343        // Should always sample at rate 1.0
344        assert!(policy.should_sample("trace-001"));
345        assert!(policy.should_sample("trace-002"));
346        assert!(policy.should_sample("any-trace-id"));
347    }
348
349    #[test]
350    fn test_should_sample_never() {
351        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 0.0);
352
353        // Should never sample at rate 0.0
354        assert!(!policy.should_sample("trace-001"));
355        assert!(!policy.should_sample("trace-002"));
356    }
357
358    #[test]
359    fn test_should_sample_deterministic() {
360        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 0.5);
361
362        // Same trace ID should always get same result
363        let result1 = policy.should_sample("trace-001");
364        let result2 = policy.should_sample("trace-001");
365        assert_eq!(result1, result2);
366    }
367
368    #[test]
369    fn test_should_sample_distribution() {
370        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 0.5);
371
372        // With enough samples, should be roughly 50%
373        let sampled: usize =
374            (0..1000).filter(|i| policy.should_sample(&format!("trace-{i}"))).count();
375
376        // Allow 10% tolerance
377        assert!(sampled > 400 && sampled < 600, "Expected ~500 samples, got {sampled}");
378    }
379
380    #[test]
381    fn test_estimate_compressed_size() {
382        let policy_none = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 1.0);
383        let policy_zstd = TraceStoragePolicy::new(CompressionAlgorithm::Zstd, 7, 1024, 1.0);
384
385        let size = 1000u64;
386
387        // No compression = same size
388        assert_eq!(policy_none.estimate_compressed_size(size), 1000);
389
390        // Zstd should estimate smaller
391        assert!(policy_zstd.estimate_compressed_size(size) < size);
392    }
393
394    #[test]
395    fn test_would_exceed_limit() {
396        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1000, 1.0);
397
398        // Not exceeding
399        assert!(!policy.would_exceed_limit(0, 500));
400        assert!(!policy.would_exceed_limit(500, 500));
401
402        // Exceeding
403        assert!(policy.would_exceed_limit(500, 600));
404        assert!(policy.would_exceed_limit(1000, 1));
405    }
406
407    #[test]
408    fn test_would_exceed_limit_unlimited() {
409        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 0, 1.0);
410
411        // Should never exceed with unlimited storage
412        assert!(!policy.would_exceed_limit(u64::MAX - 1, 1));
413    }
414
415    #[test]
416    fn test_is_indefinite_retention() {
417        let indefinite = TraceStoragePolicy::new(CompressionAlgorithm::None, 0, 1024, 1.0);
418        let limited = TraceStoragePolicy::new(CompressionAlgorithm::None, 30, 1024, 1.0);
419
420        assert!(indefinite.is_indefinite_retention());
421        assert!(!limited.is_indefinite_retention());
422    }
423
424    #[test]
425    fn test_is_unlimited_storage() {
426        let unlimited = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 0, 1.0);
427        let limited = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 1.0);
428
429        assert!(unlimited.is_unlimited_storage());
430        assert!(!limited.is_unlimited_storage());
431    }
432
433    #[test]
434    fn test_sample_percentage() {
435        let policy = TraceStoragePolicy::new(CompressionAlgorithm::None, 7, 1024, 0.75);
436        assert!((policy.sample_percentage() - 75.0).abs() < f64::EPSILON);
437    }
438
439    #[test]
440    fn test_validate_valid_policy() {
441        let policy = TraceStoragePolicy::default();
442        assert!(policy.validate().is_ok());
443    }
444
445    #[test]
446    fn test_validate_invalid_sample_rate() {
447        let mut policy = TraceStoragePolicy::default();
448        policy.sample_rate = 1.5; // Invalid (bypassing new's clamp)
449        assert!(matches!(policy.validate(), Err(PolicyValidationError::InvalidSampleRate(_))));
450    }
451
452    #[test]
453    fn test_policy_validation_error_display() {
454        let err = PolicyValidationError::InvalidSampleRate(1.5);
455        let msg = format!("{err}");
456        assert!(msg.contains("1.5"));
457        assert!(msg.contains("0.0"));
458        assert!(msg.contains("1.0"));
459    }
460
461    #[test]
462    fn test_trace_storage_policy_serialization() {
463        let policy = TraceStoragePolicy::production();
464        let json = serde_json::to_string(&policy).expect("JSON serialization should succeed");
465        let parsed: TraceStoragePolicy =
466            serde_json::from_str(&json).expect("JSON deserialization should succeed");
467
468        assert_eq!(parsed.compression, policy.compression);
469        assert_eq!(parsed.retention_days, policy.retention_days);
470        assert_eq!(parsed.max_size_bytes, policy.max_size_bytes);
471        assert!((parsed.sample_rate - policy.sample_rate).abs() < f64::EPSILON);
472    }
473}