scirs2-ndimage 0.4.2

N-dimensional image processing module for SciRS2 (scirs2-ndimage)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
//! Advanced performance profiling and optimization tools for ndimage operations
//!
//! This module provides comprehensive performance analysis, monitoring, and optimization
//! recommendations for ndimage operations. It includes real-time profiling, memory tracking,
//! and intelligent performance optimization suggestions.

use std::collections::{HashMap, VecDeque};
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use std::time::{Duration, Instant};

use scirs2_core::ndarray::{Array, ArrayView, Dimension, IxDyn};
use scirs2_core::numeric::{Float, FromPrimitive};

use crate::error::NdimageResult;

/// Comprehensive performance profiler for ndimage operations
#[derive(Debug)]
pub struct PerformanceProfiler {
    /// Operation timing records
    timing_records: Arc<RwLock<HashMap<String, Vec<OperationTiming>>>>,
    /// Memory usage tracking
    memory_tracker: Arc<Mutex<MemoryTracker>>,
    /// Performance metrics aggregator
    metrics_aggregator: Arc<Mutex<MetricsAggregator>>,
    /// Optimization recommendations engine
    optimizer: Arc<Mutex<OptimizationEngine>>,
    /// Real-time monitoring state
    monitoring_active: Arc<Mutex<bool>>,
    /// Configuration
    config: ProfilerConfig,
}

#[derive(Debug, Clone)]
pub struct ProfilerConfig {
    /// Maximum number of timing records to keep per operation
    pub max_records_per_operation: usize,
    /// Sampling interval for memory monitoring
    pub memory_sampling_interval: Duration,
    /// Enable detailed SIMD profiling
    pub enable_simd_profiling: bool,
    /// Enable cache analysis
    pub enable_cache_analysis: bool,
    /// Performance reporting interval
    pub reporting_interval: Duration,
}

impl Default for ProfilerConfig {
    fn default() -> Self {
        Self {
            max_records_per_operation: 1000,
            memory_sampling_interval: Duration::from_millis(100),
            enable_simd_profiling: true,
            enable_cache_analysis: true,
            reporting_interval: Duration::from_secs(30),
        }
    }
}

#[derive(Debug, Clone)]
pub struct OperationTiming {
    /// Operation name
    pub name: String,
    /// Input array dimensions
    pub input_dimensions: Vec<usize>,
    /// Data type information
    pub data_type: String,
    /// Execution time
    pub execution_time: Duration,
    /// Memory allocated during operation
    pub memory_allocated: usize,
    /// Memory peak usage
    pub memory_peak: usize,
    /// SIMD utilization (0.0 - 1.0)
    pub simd_utilization: f64,
    /// Cache hit ratio
    pub cache_hit_ratio: f64,
    /// Timestamp
    pub timestamp: Instant,
    /// Additional metadata
    pub metadata: HashMap<String, String>,
}

#[derive(Debug)]
pub struct MemoryTracker {
    /// Current memory usage
    current_usage: usize,
    /// Peak memory usage
    peak_usage: usize,
    /// Memory usage history (timestamp, usage)
    usagehistory: VecDeque<(Instant, usize)>,
    /// Memory allocation tracking
    allocations: HashMap<String, usize>,
}

impl Default for MemoryTracker {
    fn default() -> Self {
        Self {
            current_usage: 0,
            peak_usage: 0,
            usagehistory: VecDeque::new(),
            allocations: HashMap::new(),
        }
    }
}

#[derive(Debug)]
pub struct MetricsAggregator {
    /// Aggregated performance metrics by operation type
    operationmetrics: HashMap<String, AggregatedMetrics>,
    /// System-wide performance indicators
    systemmetrics: SystemMetrics,
    /// Performance trends
    trends: PerformanceTrends,
}

#[derive(Debug, Clone)]
pub struct AggregatedMetrics {
    /// Number of operations recorded
    pub operation_count: usize,
    /// Average execution time
    pub avg_execution_time: Duration,
    /// Minimum execution time
    pub min_execution_time: Duration,
    /// Maximum execution time
    pub max_execution_time: Duration,
    /// Standard deviation of execution time
    pub std_dev_execution_time: Duration,
    /// Average memory usage
    pub avg_memory_usage: usize,
    /// Average SIMD utilization
    pub avg_simd_utilization: f64,
    /// Average cache hit ratio
    pub avg_cache_hit_ratio: f64,
    /// Performance efficiency score (0.0 - 1.0)
    pub efficiency_score: f64,
}

#[derive(Debug, Clone)]
pub struct SystemMetrics {
    /// Total operations performed
    pub total_operations: usize,
    /// Total execution time across all operations
    pub total_execution_time: Duration,
    /// Total memory allocated
    pub total_memory_allocated: usize,
    /// Average system load
    pub avg_system_load: f64,
    /// SIMD capability utilization
    pub simd_capability_utilization: f64,
}

#[derive(Debug, Clone)]
pub struct PerformanceTrends {
    /// Execution time trend (positive = getting slower)
    pub execution_time_trend: f64,
    /// Memory usage trend (positive = using more memory)
    pub memory_usage_trend: f64,
    /// Efficiency trend (positive = getting more efficient)
    pub efficiency_trend: f64,
    /// Trend confidence (0.0 - 1.0)
    pub trend_confidence: f64,
}

#[derive(Debug)]
pub struct OptimizationEngine {
    /// Performance bottleneck analysis
    bottlenecks: Vec<PerformanceBottleneck>,
    /// Optimization recommendations
    recommendations: Vec<OptimizationRecommendation>,
    /// Historical optimization impact
    optimizationhistory: Vec<OptimizationImpact>,
}

#[derive(Debug, Clone)]
pub struct PerformanceBottleneck {
    /// Bottleneck type
    pub bottleneck_type: BottleneckType,
    /// Operation affected
    pub operation: String,
    /// Severity (0.0 - 1.0, higher = more severe)
    pub severity: f64,
    /// Description
    pub description: String,
    /// Potential performance impact
    pub impact_estimate: f64,
}

#[derive(Debug, Clone)]
pub enum BottleneckType {
    MemoryBandwidth,
    CacheMisses,
    UnoptimizedSIMD,
    SuboptimalAlgorithm,
    MemoryFragmentation,
    ThreadContention,
    IOBottleneck,
}

#[derive(Debug, Clone)]
pub struct OptimizationRecommendation {
    /// Recommendation type
    pub recommendation_type: RecommendationType,
    /// Operation to optimize
    pub operation: String,
    /// Priority (0.0 - 1.0, higher = more important)
    pub priority: f64,
    /// Estimated performance improvement
    pub estimated_improvement: f64,
    /// Implementation difficulty (0.0 - 1.0, higher = more difficult)
    pub implementation_difficulty: f64,
    /// Detailed description
    pub description: String,
    /// Code examples or hints
    pub implementation_hints: Vec<String>,
}

#[derive(Debug, Clone)]
pub enum RecommendationType {
    EnableSIMD,
    OptimizeMemoryLayout,
    UseAlternativeAlgorithm,
    IncreaseCacheEfficiency,
    ReduceMemoryAllocations,
    EnableParallelization,
    OptimizeGPUUsage,
}

#[derive(Debug, Clone)]
pub struct OptimizationImpact {
    /// Optimization applied
    pub optimization: String,
    /// Performance before optimization
    pub beforemetrics: AggregatedMetrics,
    /// Performance after optimization
    pub aftermetrics: AggregatedMetrics,
    /// Actual improvement achieved
    pub improvement_achieved: f64,
    /// Timestamp when optimization was applied
    pub timestamp: Instant,
}

impl PerformanceProfiler {
    /// Create a new performance profiler
    pub fn new(config: ProfilerConfig) -> Self {
        Self {
            timing_records: Arc::new(RwLock::new(HashMap::new())),
            memory_tracker: Arc::new(Mutex::new(MemoryTracker::default())),
            metrics_aggregator: Arc::new(Mutex::new(MetricsAggregator::new())),
            optimizer: Arc::new(Mutex::new(OptimizationEngine::new())),
            monitoring_active: Arc::new(Mutex::new(false)),
            config,
        }
    }

    /// Start real-time performance monitoring
    pub fn start_monitoring(&self) -> NdimageResult<()> {
        let mut active = self.monitoring_active.lock().expect("Operation failed");
        if *active {
            return Ok(()); // Already monitoring
        }
        *active = true;

        // Start memory monitoring thread
        let memory_tracker = Arc::clone(&self.memory_tracker);
        let sampling_interval = self.config.memory_sampling_interval;
        let monitoring_active = Arc::clone(&self.monitoring_active);

        thread::spawn(move || {
            while *monitoring_active.lock().expect("Operation failed") {
                let current_memory = get_current_memory_usage();
                let mut tracker = memory_tracker.lock().expect("Operation failed");
                tracker.update_memory_usage(current_memory);
                drop(tracker);

                thread::sleep(sampling_interval);
            }
        });

        // Start metrics aggregation thread
        let metrics_aggregator = Arc::clone(&self.metrics_aggregator);
        let timing_records = Arc::clone(&self.timing_records);
        let reporting_interval = self.config.reporting_interval;
        let monitoring_active = Arc::clone(&self.monitoring_active);

        thread::spawn(move || {
            while *monitoring_active.lock().expect("Operation failed") {
                {
                    let records = timing_records.read().expect("Operation failed");
                    let mut aggregator = metrics_aggregator.lock().expect("Operation failed");
                    aggregator.updatemetrics(&records);
                }

                thread::sleep(reporting_interval);
            }
        });

        Ok(())
    }

    /// Stop performance monitoring
    pub fn stop_monitoring(&self) {
        let mut active = self.monitoring_active.lock().expect("Operation failed");
        *active = false;
    }

    /// Record operation timing and performance data
    pub fn record_operation<T, D>(
        &self,
        operation_name: &str,
        input: &ArrayView<T, D>,
        execution_time: Duration,
        memory_allocated: usize,
        metadata: HashMap<String, String>,
    ) -> NdimageResult<()>
    where
        T: Float + FromPrimitive,
        D: Dimension,
    {
        let timing = OperationTiming {
            name: operation_name.to_string(),
            input_dimensions: input.shape().to_vec(),
            data_type: std::any::type_name::<T>().to_string(),
            execution_time,
            memory_allocated,
            memory_peak: self
                .memory_tracker
                .lock()
                .expect("Operation failed")
                .peak_usage,
            simd_utilization: self.estimate_simd_utilization(operation_name, input.len()),
            cache_hit_ratio: self.estimate_cache_hit_ratio(input.len()),
            timestamp: Instant::now(),
            metadata,
        };

        let mut records = self.timing_records.write().expect("Operation failed");
        let operation_records = records
            .entry(operation_name.to_string())
            .or_insert_with(Vec::new);
        operation_records.push(timing);

        // Limit number of records to prevent memory bloat
        if operation_records.len() > self.config.max_records_per_operation {
            operation_records.remove(0);
        }

        Ok(())
    }

    /// Generate comprehensive performance report
    pub fn generate_performance_report(&self) -> PerformanceReport {
        let _records = self.timing_records.read().expect("Operation failed");
        let aggregator = self.metrics_aggregator.lock().expect("Operation failed");
        let optimizer = self.optimizer.lock().expect("Operation failed");
        let memory_tracker = self.memory_tracker.lock().expect("Operation failed");

        PerformanceReport {
            operationmetrics: aggregator.operationmetrics.clone(),
            systemmetrics: aggregator.systemmetrics.clone(),
            trends: aggregator.trends.clone(),
            bottlenecks: optimizer.bottlenecks.clone(),
            recommendations: optimizer.recommendations.clone(),
            memory_statistics: memory_tracker.get_statistics(),
            timestamp: Instant::now(),
        }
    }

    /// Get optimization recommendations for specific operation
    pub fn get_optimization_recommendations(
        &self,
        operation_name: &str,
    ) -> Vec<OptimizationRecommendation> {
        let optimizer = self.optimizer.lock().expect("Operation failed");
        optimizer
            .recommendations
            .iter()
            .filter(|rec| rec.operation == operation_name)
            .cloned()
            .collect()
    }

    /// Benchmark specific operation with various array sizes
    pub fn benchmark_operation<F, T>(
        &self,
        operation_name: &str,
        operation: F,
        test_sizes: &[Vec<usize>],
        iterations: usize,
    ) -> NdimageResult<BenchmarkResults>
    where
        F: Fn(&ArrayView<T, IxDyn>) -> NdimageResult<Array<T, IxDyn>>,
        T: Float + FromPrimitive + Clone + Default,
    {
        let mut results = Vec::new();

        for size in test_sizes {
            let input = Array::default(size.as_slice());
            let input_view = input.view();

            let mut timings = Vec::new();
            let mut memory_usages = Vec::new();

            for _ in 0..iterations {
                let start_memory = get_current_memory_usage();
                let start_time = Instant::now();

                let _result = operation(&input_view)?;

                let execution_time = start_time.elapsed();
                let end_memory = get_current_memory_usage();
                let memory_used = end_memory.saturating_sub(start_memory);

                timings.push(execution_time);
                memory_usages.push(memory_used);
            }

            let avg_time = timings.iter().sum::<Duration>() / timings.len() as u32;
            let min_time = timings.iter().min().expect("Operation failed").clone();
            let max_time = timings.iter().max().expect("Operation failed").clone();
            let avg_memory = memory_usages.iter().sum::<usize>() / memory_usages.len();

            results.push(BenchmarkResult {
                array_size: size.clone(),
                average_time: avg_time,
                min_time,
                max_time,
                average_memory: avg_memory,
                throughput: calculate_throughput(size, avg_time),
            });
        }

        Ok(BenchmarkResults {
            operation_name: operation_name.to_string(),
            results,
            timestamp: Instant::now(),
        })
    }

    // Helper methods

    fn estimate_simd_utilization(&self, operation_name: &str, _arraysize: usize) -> f64 {
        // This would integrate with actual SIMD performance counters in a real implementation
        // For now, provide estimates based on operation characteristics
        match operation_name {
            name if name.contains("simd") => 0.85,
            name if name.contains("convolution") => 0.70,
            name if name.contains("filter") => 0.60,
            _ => 0.30,
        }
    }

    fn estimate_cache_hit_ratio(&self, arraysize: usize) -> f64 {
        // Simple heuristic: smaller arrays have better cache hit ratios
        if arraysize < 1024 * 1024 {
            // < 1MB for f64
            0.95
        } else if arraysize < 16 * 1024 * 1024 {
            // < 16MB
            0.80
        } else {
            0.60
        }
    }
}

impl MetricsAggregator {
    fn new() -> Self {
        Self {
            operationmetrics: HashMap::new(),
            systemmetrics: SystemMetrics {
                total_operations: 0,
                total_execution_time: Duration::ZERO,
                total_memory_allocated: 0,
                avg_system_load: 0.0,
                simd_capability_utilization: 0.0,
            },
            trends: PerformanceTrends {
                execution_time_trend: 0.0,
                memory_usage_trend: 0.0,
                efficiency_trend: 0.0,
                trend_confidence: 0.0,
            },
        }
    }

    fn updatemetrics(&mut self, records: &HashMap<String, Vec<OperationTiming>>) {
        for (operation_name, timings) in records {
            let metrics = self.calculate_aggregatedmetrics(timings);
            self.operationmetrics
                .insert(operation_name.clone(), metrics);
        }

        self.update_systemmetrics(records);
        self.update_trends(records);
    }

    fn calculate_aggregatedmetrics(&self, timings: &[OperationTiming]) -> AggregatedMetrics {
        if timings.is_empty() {
            return AggregatedMetrics {
                operation_count: 0,
                avg_execution_time: Duration::ZERO,
                min_execution_time: Duration::ZERO,
                max_execution_time: Duration::ZERO,
                std_dev_execution_time: Duration::ZERO,
                avg_memory_usage: 0,
                avg_simd_utilization: 0.0,
                avg_cache_hit_ratio: 0.0,
                efficiency_score: 0.0,
            };
        }

        let execution_times: Vec<Duration> = timings.iter().map(|t| t.execution_time).collect();
        let avg_execution_time =
            execution_times.iter().sum::<Duration>() / execution_times.len() as u32;
        let min_execution_time = execution_times
            .iter()
            .min()
            .expect("Operation failed")
            .clone();
        let max_execution_time = execution_times
            .iter()
            .max()
            .expect("Operation failed")
            .clone();

        let avg_memory_usage =
            timings.iter().map(|t| t.memory_allocated).sum::<usize>() / timings.len();
        let avg_simd_utilization =
            timings.iter().map(|t| t.simd_utilization).sum::<f64>() / timings.len() as f64;
        let avg_cache_hit_ratio =
            timings.iter().map(|t| t.cache_hit_ratio).sum::<f64>() / timings.len() as f64;

        // Calculate standard deviation
        let variance = execution_times
            .iter()
            .map(|t| {
                let diff = t.as_nanos() as f64 - avg_execution_time.as_nanos() as f64;
                diff * diff
            })
            .sum::<f64>()
            / execution_times.len() as f64;
        let std_dev_nanos = variance.sqrt() as u64;
        let std_dev_execution_time = Duration::from_nanos(std_dev_nanos);

        // Calculate efficiency score (combination of SIMD utilization and cache hit ratio)
        let efficiency_score = (avg_simd_utilization + avg_cache_hit_ratio) / 2.0;

        AggregatedMetrics {
            operation_count: timings.len(),
            avg_execution_time,
            min_execution_time,
            max_execution_time,
            std_dev_execution_time,
            avg_memory_usage,
            avg_simd_utilization,
            avg_cache_hit_ratio,
            efficiency_score,
        }
    }

    fn update_systemmetrics(&mut self, records: &HashMap<String, Vec<OperationTiming>>) {
        let total_operations: usize = records.values().map(|v| v.len()).sum();
        let total_execution_time: Duration =
            records.values().flatten().map(|t| t.execution_time).sum();
        let total_memory_allocated: usize =
            records.values().flatten().map(|t| t.memory_allocated).sum();

        self.systemmetrics.total_operations = total_operations;
        self.systemmetrics.total_execution_time = total_execution_time;
        self.systemmetrics.total_memory_allocated = total_memory_allocated;
    }

    fn update_trends(&mut self, _records: &HashMap<String, Vec<OperationTiming>>) {
        // Simple trend analysis based on recent vs. older measurements
        // In a full implementation, this would use more sophisticated time series analysis
        self.trends.execution_time_trend = 0.0; // Placeholder
        self.trends.memory_usage_trend = 0.0; // Placeholder
        self.trends.efficiency_trend = 0.0; // Placeholder
        self.trends.trend_confidence = 0.5; // Placeholder
    }
}

impl OptimizationEngine {
    fn new() -> Self {
        Self {
            bottlenecks: Vec::new(),
            recommendations: Vec::new(),
            optimizationhistory: Vec::new(),
        }
    }
}

impl MemoryTracker {
    fn update_memory_usage(&mut self, usage: usize) {
        self.current_usage = usage;
        self.peak_usage = self.peak_usage.max(usage);

        let now = Instant::now();
        self.usagehistory.push_back((now, usage));

        // Keep only recent history (last hour)
        let cutoff = now - Duration::from_secs(3600);
        while self
            .usagehistory
            .front()
            .map_or(false, |&(time, _)| time < cutoff)
        {
            self.usagehistory.pop_front();
        }
    }

    fn get_statistics(&self) -> MemoryStatistics {
        let recent_usages: Vec<usize> = self.usagehistory.iter().map(|(_, usage)| *usage).collect();
        let avg_usage = if recent_usages.is_empty() {
            0
        } else {
            recent_usages.iter().sum::<usize>() / recent_usages.len()
        };

        MemoryStatistics {
            current_usage: self.current_usage,
            peak_usage: self.peak_usage,
            average_usage: avg_usage,
            allocations: self.allocations.clone(),
        }
    }
}

// Supporting types for performance reporting

#[derive(Debug, Clone)]
pub struct PerformanceReport {
    /// Per-operation performance metrics
    pub operationmetrics: HashMap<String, AggregatedMetrics>,
    /// System-wide performance metrics
    pub systemmetrics: SystemMetrics,
    /// Performance trends
    pub trends: PerformanceTrends,
    /// Identified performance bottlenecks
    pub bottlenecks: Vec<PerformanceBottleneck>,
    /// Optimization recommendations
    pub recommendations: Vec<OptimizationRecommendation>,
    /// Memory usage statistics
    pub memory_statistics: MemoryStatistics,
    /// Report timestamp
    pub timestamp: Instant,
}

#[derive(Debug, Clone)]
pub struct MemoryStatistics {
    /// Current memory usage in bytes
    pub current_usage: usize,
    /// Peak memory usage in bytes
    pub peak_usage: usize,
    /// Average memory usage in bytes
    pub average_usage: usize,
    /// Memory allocations by category
    pub allocations: HashMap<String, usize>,
}

#[derive(Debug, Clone)]
pub struct BenchmarkResults {
    /// Operation name
    pub operation_name: String,
    /// Benchmark results for different array sizes
    pub results: Vec<BenchmarkResult>,
    /// Timestamp
    pub timestamp: Instant,
}

#[derive(Debug, Clone)]
pub struct BenchmarkResult {
    /// Array dimensions tested
    pub array_size: Vec<usize>,
    /// Average execution time
    pub average_time: Duration,
    /// Minimum execution time
    pub min_time: Duration,
    /// Maximum execution time
    pub max_time: Duration,
    /// Average memory usage
    pub average_memory: usize,
    /// Throughput (elements/second)
    pub throughput: f64,
}

impl PerformanceReport {
    /// Display a formatted performance report
    pub fn display(&self) {
        println!("\n=== Performance Analysis Report ===");
        println!("Generated at: {:?}", self.timestamp);

        println!("\n--- System Metrics ---");
        println!("Total Operations: {}", self.systemmetrics.total_operations);
        println!(
            "Total Execution Time: {:.3}s",
            self.systemmetrics.total_execution_time.as_secs_f64()
        );
        println!(
            "Total Memory Allocated: {:.2} MB",
            self.systemmetrics.total_memory_allocated as f64 / (1024.0 * 1024.0)
        );

        println!("\n--- Memory Statistics ---");
        println!(
            "Current Usage: {:.2} MB",
            self.memory_statistics.current_usage as f64 / (1024.0 * 1024.0)
        );
        println!(
            "Peak Usage: {:.2} MB",
            self.memory_statistics.peak_usage as f64 / (1024.0 * 1024.0)
        );
        println!(
            "Average Usage: {:.2} MB",
            self.memory_statistics.average_usage as f64 / (1024.0 * 1024.0)
        );

        println!("\n--- Top Operations by Time ---");
        let mut operations: Vec<_> = self.operationmetrics.iter().collect();
        operations.sort_by_key(|item| std::cmp::Reverse(item.1.avg_execution_time));

        for (name, metrics) in operations.iter().take(5) {
            println!(
                "{}: {:.3}ms avg, {:.1}% SIMD, {:.1}% cache hits",
                name,
                metrics.avg_execution_time.as_secs_f64() * 1000.0,
                metrics.avg_simd_utilization * 100.0,
                metrics.avg_cache_hit_ratio * 100.0
            );
        }

        if !self.recommendations.is_empty() {
            println!("\n--- Optimization Recommendations ---");
            for (i, rec) in self.recommendations.iter().take(3).enumerate() {
                println!(
                    "{}. {} for '{}' (Priority: {:.1}, Est. improvement: {:.1}%)",
                    i + 1,
                    format!("{:?}", rec.recommendation_type),
                    rec.operation,
                    rec.priority * 100.0,
                    rec.estimated_improvement * 100.0
                );
            }
        }

        if !self.bottlenecks.is_empty() {
            println!("\n--- Performance Bottlenecks ---");
            for bottleneck in &self.bottlenecks {
                println!(
                    "- {:?} in '{}': {} (Severity: {:.1}%)",
                    bottleneck.bottleneck_type,
                    bottleneck.operation,
                    bottleneck.description,
                    bottleneck.severity * 100.0
                );
            }
        }
    }

    /// Export report to JSON format
    pub fn to_json(&self) -> serde_json::Result<String> {
        // This would require serde serialization in a real implementation
        Ok(format!(
            "{{\"timestamp\": \"{:?}\", \"summary\": \"Performance report generated\"}}",
            self.timestamp
        ))
    }
}

// Helper functions

#[allow(dead_code)]
fn get_current_memory_usage() -> usize {
    // In a real implementation, this would use platform-specific APIs
    // to get actual memory usage (e.g., /proc/self/status on Linux,
    // GetProcessMemoryInfo on Windows, etc.)
    // For now, return a placeholder value
    1024 * 1024 * 100 // 100MB placeholder
}

#[allow(dead_code)]
fn calculate_throughput(array_size: &[usize], executiontime: Duration) -> f64 {
    let total_elements: usize = array_size.iter().product();
    let time_seconds = executiontime.as_secs_f64();

    if time_seconds > 0.0 {
        total_elements as f64 / time_seconds
    } else {
        0.0
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use scirs2_core::ndarray::Array2;

    #[test]
    fn test_profiler_creation() {
        let config = ProfilerConfig::default();
        let profiler = PerformanceProfiler::new(config);

        // Test that profiler can be created without errors
        assert!(!(*profiler.monitoring_active.lock().expect("Operation failed")));
    }

    #[test]
    fn test_operation_recording() {
        let profiler = PerformanceProfiler::new(ProfilerConfig::default());
        let input = Array2::<f64>::zeros((100, 100));
        let metadata = HashMap::new();

        let result = profiler.record_operation(
            "test_operation",
            &input.view(),
            Duration::from_millis(10),
            1024,
            metadata,
        );

        assert!(result.is_ok());

        let records = profiler.timing_records.read().expect("Operation failed");
        assert!(records.contains_key("test_operation"));
        assert_eq!(records["test_operation"].len(), 1);
    }

    #[test]
    fn test_performance_report_generation() {
        let profiler = PerformanceProfiler::new(ProfilerConfig::default());
        let report = profiler.generate_performance_report();

        assert!(report.operationmetrics.is_empty()); // No operations recorded yet
        assert_eq!(report.systemmetrics.total_operations, 0);
    }
}