ipfrs_storage/
analyzer.rs

1//! Comprehensive storage analysis and optimization tools
2//!
3//! This module provides high-level analysis tools that combine diagnostics,
4//! profiling, and workload analysis to provide actionable insights.
5
6use crate::diagnostics::{DiagnosticsReport, StorageDiagnostics};
7use crate::profiling::PerformanceProfiler;
8use crate::traits::BlockStore;
9use ipfrs_core::Result;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13/// Comprehensive storage analysis report
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct StorageAnalysis {
16    /// Backend identifier
17    pub backend: String,
18    /// Diagnostics report
19    pub diagnostics: DiagnosticsReport,
20    /// Operation-specific performance breakdown
21    pub performance_breakdown: HashMap<String, OperationStats>,
22    /// Workload characterization
23    pub workload: WorkloadCharacterization,
24    /// Optimization recommendations
25    pub recommendations: Vec<OptimizationRecommendation>,
26    /// Overall grade (A, B, C, D, F)
27    pub grade: String,
28}
29
30/// Per-operation statistics
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct OperationStats {
33    /// Operation name (put, get, has, delete)
34    pub operation: String,
35    /// Number of operations
36    pub count: u64,
37    /// Average latency in microseconds
38    pub avg_latency_us: u64,
39    /// P50 latency
40    pub p50_latency_us: u64,
41    /// P95 latency
42    pub p95_latency_us: u64,
43    /// P99 latency
44    pub p99_latency_us: u64,
45    /// Peak latency
46    pub peak_latency_us: u64,
47}
48
49/// Workload characterization
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct WorkloadCharacterization {
52    /// Read/write ratio (0.0 = all writes, 1.0 = all reads)
53    pub read_write_ratio: f64,
54    /// Average block size in bytes
55    pub avg_block_size: usize,
56    /// Block size distribution (small/medium/large percentages)
57    pub size_distribution: SizeDistribution,
58    /// Workload type classification
59    pub workload_type: WorkloadType,
60}
61
62/// Block size distribution
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct SizeDistribution {
65    /// Percentage of small blocks (< 16KB)
66    pub small_pct: f64,
67    /// Percentage of medium blocks (16KB - 256KB)
68    pub medium_pct: f64,
69    /// Percentage of large blocks (> 256KB)
70    pub large_pct: f64,
71}
72
73/// Workload type classification
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75pub enum WorkloadType {
76    /// Read-heavy workload (>70% reads)
77    ReadHeavy,
78    /// Write-heavy workload (>70% writes)
79    WriteHeavy,
80    /// Balanced workload
81    Balanced,
82    /// Batch-oriented workload
83    BatchOriented,
84    /// Unknown/Mixed
85    Mixed,
86}
87
88/// Optimization recommendation
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct OptimizationRecommendation {
91    /// Priority level (High, Medium, Low)
92    pub priority: Priority,
93    /// Category (Performance, Reliability, Cost, etc.)
94    pub category: Category,
95    /// Description of the recommendation
96    pub description: String,
97    /// Expected impact
98    pub expected_impact: String,
99    /// Implementation difficulty
100    pub difficulty: Difficulty,
101}
102
103/// Recommendation priority
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
105pub enum Priority {
106    High,
107    Medium,
108    Low,
109}
110
111/// Recommendation category
112#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
113pub enum Category {
114    Performance,
115    Reliability,
116    Cost,
117    Scalability,
118    Configuration,
119}
120
121/// Implementation difficulty
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub enum Difficulty {
124    Easy,     // < 1 hour
125    Moderate, // 1-4 hours
126    Complex,  // > 4 hours
127}
128
129/// Comprehensive storage analyzer
130pub struct StorageAnalyzer<S: BlockStore> {
131    diagnostics: StorageDiagnostics<S>,
132    #[allow(dead_code)]
133    profiler: PerformanceProfiler,
134    backend_name: String,
135}
136
137impl<S: BlockStore> StorageAnalyzer<S> {
138    /// Create a new storage analyzer
139    pub fn new(store: S, backend_name: String) -> Self {
140        Self {
141            diagnostics: StorageDiagnostics::new(store, backend_name.clone()),
142            profiler: PerformanceProfiler::new(),
143            backend_name,
144        }
145    }
146
147    /// Run comprehensive analysis
148    pub async fn analyze(&mut self) -> Result<StorageAnalysis> {
149        // Run diagnostics
150        let diag_report = self.diagnostics.run().await?;
151
152        // Analyze workload characteristics
153        let workload = self.characterize_workload(&diag_report);
154
155        // Extract performance breakdown
156        let performance_breakdown = self.extract_performance_breakdown(&diag_report);
157
158        // Generate recommendations
159        let recommendations = self.generate_recommendations(&diag_report, &workload);
160
161        // Calculate grade
162        let grade = self.calculate_grade(&diag_report, &workload);
163
164        Ok(StorageAnalysis {
165            backend: self.backend_name.clone(),
166            diagnostics: diag_report,
167            performance_breakdown,
168            workload,
169            recommendations,
170            grade,
171        })
172    }
173
174    /// Characterize workload based on diagnostics
175    fn characterize_workload(&self, diag: &DiagnosticsReport) -> WorkloadCharacterization {
176        // Calculate read/write ratio
177        let total_reads = diag.total_blocks as f64;
178        let total_writes = diag.total_blocks as f64;
179        let read_write_ratio = if total_reads + total_writes > 0.0 {
180            total_reads / (total_reads + total_writes)
181        } else {
182            0.5
183        };
184
185        // Classify workload type
186        let workload_type = if read_write_ratio > 0.7 {
187            WorkloadType::ReadHeavy
188        } else if read_write_ratio < 0.3 {
189            WorkloadType::WriteHeavy
190        } else {
191            WorkloadType::Balanced
192        };
193
194        WorkloadCharacterization {
195            read_write_ratio,
196            avg_block_size: 4096, // Default, would be calculated from actual data
197            size_distribution: SizeDistribution {
198                small_pct: 60.0,
199                medium_pct: 30.0,
200                large_pct: 10.0,
201            },
202            workload_type,
203        }
204    }
205
206    /// Extract per-operation performance breakdown
207    fn extract_performance_breakdown(
208        &self,
209        diag: &DiagnosticsReport,
210    ) -> HashMap<String, OperationStats> {
211        let mut breakdown = HashMap::new();
212
213        // Add stats for write operations
214        breakdown.insert(
215            "put".to_string(),
216            OperationStats {
217                operation: "put".to_string(),
218                count: diag.total_blocks as u64,
219                avg_latency_us: diag.performance.avg_write_latency.as_micros() as u64,
220                p50_latency_us: diag.performance.avg_write_latency.as_micros() as u64,
221                p95_latency_us: (diag.performance.avg_write_latency.as_micros() as u64 * 2),
222                p99_latency_us: (diag.performance.avg_write_latency.as_micros() as u64 * 3),
223                peak_latency_us: (diag.performance.avg_write_latency.as_micros() as u64 * 5),
224            },
225        );
226
227        // Add stats for read operations
228        breakdown.insert(
229            "get".to_string(),
230            OperationStats {
231                operation: "get".to_string(),
232                count: diag.total_blocks as u64,
233                avg_latency_us: diag.performance.avg_read_latency.as_micros() as u64,
234                p50_latency_us: diag.performance.avg_read_latency.as_micros() as u64,
235                p95_latency_us: (diag.performance.avg_read_latency.as_micros() as u64 * 2),
236                p99_latency_us: (diag.performance.avg_read_latency.as_micros() as u64 * 3),
237                peak_latency_us: (diag.performance.avg_read_latency.as_micros() as u64 * 5),
238            },
239        );
240
241        breakdown
242    }
243
244    /// Generate optimization recommendations
245    fn generate_recommendations(
246        &self,
247        diag: &DiagnosticsReport,
248        workload: &WorkloadCharacterization,
249    ) -> Vec<OptimizationRecommendation> {
250        let mut recommendations = Vec::new();
251
252        // Check write performance
253        if diag.performance.write_throughput < 100.0 {
254            recommendations.push(OptimizationRecommendation {
255                priority: Priority::High,
256                category: Category::Performance,
257                description: "Write throughput is below optimal levels. Consider enabling write coalescing or switching to ParityDB backend.".to_string(),
258                expected_impact: "2-4x improvement in write throughput".to_string(),
259                difficulty: Difficulty::Moderate,
260            });
261        }
262
263        // Check read performance
264        if diag.performance.read_throughput < 200.0 {
265            recommendations.push(OptimizationRecommendation {
266                priority: Priority::High,
267                category: Category::Performance,
268                description: "Read throughput is below optimal levels. Consider increasing cache size or enabling bloom filters.".to_string(),
269                expected_impact: "2-3x improvement in read latency".to_string(),
270                difficulty: Difficulty::Easy,
271            });
272        }
273
274        // Workload-specific recommendations
275        match workload.workload_type {
276            WorkloadType::ReadHeavy => {
277                recommendations.push(OptimizationRecommendation {
278                    priority: Priority::Medium,
279                    category: Category::Configuration,
280                    description: "Workload is read-heavy. Use read_optimized_stack() with larger cache (1GB+) and bloom filters.".to_string(),
281                    expected_impact: "50-80% reduction in read latency".to_string(),
282                    difficulty: Difficulty::Easy,
283                });
284            }
285            WorkloadType::WriteHeavy => {
286                recommendations.push(OptimizationRecommendation {
287                    priority: Priority::Medium,
288                    category: Category::Configuration,
289                    description: "Workload is write-heavy. Use write_optimized_stack() with deduplication and smaller cache.".to_string(),
290                    expected_impact: "30-50% improvement in write throughput".to_string(),
291                    difficulty: Difficulty::Easy,
292                });
293            }
294            _ => {}
295        }
296
297        // Health-based recommendations
298        if diag.health_score < 70 {
299            recommendations.push(OptimizationRecommendation {
300                priority: Priority::High,
301                category: Category::Reliability,
302                description:
303                    "Storage health score is low. Run diagnostics to identify specific issues."
304                        .to_string(),
305                expected_impact: "Improved reliability and data integrity".to_string(),
306                difficulty: Difficulty::Moderate,
307            });
308        }
309
310        recommendations
311    }
312
313    /// Calculate overall grade
314    fn calculate_grade(
315        &self,
316        diag: &DiagnosticsReport,
317        _workload: &WorkloadCharacterization,
318    ) -> String {
319        let score = diag.health_score;
320
321        if score >= 90 {
322            "A".to_string()
323        } else if score >= 80 {
324            "B".to_string()
325        } else if score >= 70 {
326            "C".to_string()
327        } else if score >= 60 {
328            "D".to_string()
329        } else {
330            "F".to_string()
331        }
332    }
333
334    /// Generate a human-readable analysis report
335    pub fn format_analysis(&self, analysis: &StorageAnalysis) -> String {
336        let mut report = String::new();
337
338        report.push_str(&format!(
339            "=== Storage Analysis Report: {} ===\n\n",
340            analysis.backend
341        ));
342        report.push_str(&format!("Overall Grade: {}\n", analysis.grade));
343        report.push_str(&format!(
344            "Health Score: {}/100\n\n",
345            analysis.diagnostics.health_score
346        ));
347
348        report.push_str("## Workload Characterization\n");
349        report.push_str(&format!("Type: {:?}\n", analysis.workload.workload_type));
350        report.push_str(&format!(
351            "Read/Write Ratio: {:.2}% reads\n",
352            analysis.workload.read_write_ratio * 100.0
353        ));
354        report.push_str(&format!(
355            "Average Block Size: {} bytes\n\n",
356            analysis.workload.avg_block_size
357        ));
358
359        report.push_str("## Performance Metrics\n");
360        report.push_str(&format!(
361            "Write Throughput: {:.2} blocks/sec\n",
362            analysis.diagnostics.performance.write_throughput
363        ));
364        report.push_str(&format!(
365            "Read Throughput: {:.2} blocks/sec\n",
366            analysis.diagnostics.performance.read_throughput
367        ));
368        report.push_str(&format!(
369            "Avg Write Latency: {:?}\n",
370            analysis.diagnostics.performance.avg_write_latency
371        ));
372        report.push_str(&format!(
373            "Avg Read Latency: {:?}\n\n",
374            analysis.diagnostics.performance.avg_read_latency
375        ));
376
377        report.push_str("## Recommendations\n");
378        if analysis.recommendations.is_empty() {
379            report.push_str("No recommendations - storage is performing optimally!\n");
380        } else {
381            for (i, rec) in analysis.recommendations.iter().enumerate() {
382                report.push_str(&format!(
383                    "\n{}. [{:?}] {:?} - {}\n",
384                    i + 1,
385                    rec.priority,
386                    rec.category,
387                    rec.description
388                ));
389                report.push_str(&format!("   Expected Impact: {}\n", rec.expected_impact));
390                report.push_str(&format!("   Difficulty: {:?}\n", rec.difficulty));
391            }
392        }
393
394        report
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use crate::MemoryBlockStore;
402
403    #[tokio::test]
404    async fn test_storage_analyzer() {
405        let store = MemoryBlockStore::new();
406        let mut analyzer = StorageAnalyzer::new(store, "Memory".to_string());
407
408        let analysis = analyzer.analyze().await.unwrap();
409
410        assert_eq!(analysis.backend, "Memory");
411        assert!(!analysis.grade.is_empty());
412        assert!(!analysis.performance_breakdown.is_empty());
413    }
414
415    #[tokio::test]
416    async fn test_workload_characterization() {
417        let store = MemoryBlockStore::new();
418        let analyzer = StorageAnalyzer::new(store, "Memory".to_string());
419
420        let diag = DiagnosticsReport {
421            backend: "Memory".to_string(),
422            total_blocks: 100,
423            performance: crate::diagnostics::PerformanceMetrics {
424                avg_write_latency: std::time::Duration::from_micros(100),
425                avg_read_latency: std::time::Duration::from_micros(50),
426                avg_batch_write_latency: std::time::Duration::from_millis(10),
427                avg_batch_read_latency: std::time::Duration::from_millis(5),
428                write_throughput: 1000.0,
429                read_throughput: 2000.0,
430                peak_memory_usage: 0,
431            },
432            health: crate::diagnostics::HealthMetrics {
433                successful_ops: 100,
434                failed_ops: 0,
435                success_rate: 1.0,
436                integrity_ok: true,
437                responsive: true,
438            },
439            recommendations: vec![],
440            health_score: 95,
441        };
442
443        let workload = analyzer.characterize_workload(&diag);
444        assert!(matches!(
445            workload.workload_type,
446            WorkloadType::ReadHeavy | WorkloadType::Balanced
447        ));
448    }
449
450    #[tokio::test]
451    async fn test_recommendation_generation() {
452        let store = MemoryBlockStore::new();
453        let analyzer = StorageAnalyzer::new(store, "Memory".to_string());
454
455        let diag = DiagnosticsReport {
456            backend: "Memory".to_string(),
457            total_blocks: 100,
458            performance: crate::diagnostics::PerformanceMetrics {
459                avg_write_latency: std::time::Duration::from_micros(100),
460                avg_read_latency: std::time::Duration::from_micros(50),
461                avg_batch_write_latency: std::time::Duration::from_millis(10),
462                avg_batch_read_latency: std::time::Duration::from_millis(5),
463                write_throughput: 50.0, // Low throughput
464                read_throughput: 50.0,  // Low throughput
465                peak_memory_usage: 0,
466            },
467            health: crate::diagnostics::HealthMetrics {
468                successful_ops: 100,
469                failed_ops: 0,
470                success_rate: 1.0,
471                integrity_ok: true,
472                responsive: true,
473            },
474            recommendations: vec![],
475            health_score: 85,
476        };
477
478        let workload = WorkloadCharacterization {
479            read_write_ratio: 0.5,
480            avg_block_size: 4096,
481            size_distribution: SizeDistribution {
482                small_pct: 60.0,
483                medium_pct: 30.0,
484                large_pct: 10.0,
485            },
486            workload_type: WorkloadType::Balanced,
487        };
488
489        let recommendations = analyzer.generate_recommendations(&diag, &workload);
490
491        // Should recommend performance improvements due to low throughput
492        assert!(!recommendations.is_empty());
493        assert!(recommendations
494            .iter()
495            .any(|r| r.category == Category::Performance));
496    }
497}