1use crate::error::Result;
43use serde::{Deserialize, Serialize};
44use std::collections::HashMap;
45use std::time::{Duration, Instant};
46
47#[derive(Debug)]
53pub struct PerformanceProfiler {
54 config: ProfilerConfig,
56 counters: PerformanceCounters,
58 memory_tracker: MemoryTracker,
60 cache_analyzer: CacheAnalyzer,
62 timeline: ExecutionTimeline,
64}
65
66impl PerformanceProfiler {
67 pub fn new() -> Self {
69 Self {
70 config: ProfilerConfig::default(),
71 counters: PerformanceCounters::new(),
72 memory_tracker: MemoryTracker::new(),
73 cache_analyzer: CacheAnalyzer::new(),
74 timeline: ExecutionTimeline::new(),
75 }
76 }
77
78 pub fn with_config(config: ProfilerConfig) -> Self {
80 Self {
81 config,
82 counters: PerformanceCounters::new(),
83 memory_tracker: MemoryTracker::new(),
84 cache_analyzer: CacheAnalyzer::new(),
85 timeline: ExecutionTimeline::new(),
86 }
87 }
88
89 pub fn profile<F, R>(&mut self, f: F) -> Result<ProfileResult<R>>
91 where
92 F: FnOnce() -> R,
93 {
94 self.reset();
96
97 let start_time = Instant::now();
99 self.counters.start();
100 self.memory_tracker.start();
101
102 let result = f();
104
105 let elapsed = start_time.elapsed();
107 self.counters.stop();
108 self.memory_tracker.stop();
109
110 let metrics = ProfileMetrics {
112 total_time: elapsed,
113 cpu_time: self.counters.cpu_time(),
114 wall_time: elapsed,
115 memory_usage: self.memory_tracker.get_usage(),
116 cache_stats: self.cache_analyzer.get_stats(),
117 instruction_count: self.counters.instruction_count(),
118 branch_mispredictions: self.counters.branch_mispredictions(),
119 cache_misses: self.counters.cache_misses(),
120 };
121
122 let optimization_hints = self.generate_optimization_hints(&metrics)?;
123
124 Ok(ProfileResult {
125 result,
126 metrics,
127 timeline: self.timeline.clone(),
128 hotspots: self.identify_hotspots()?,
129 optimization_hints,
130 })
131 }
132
133 pub fn profile_detailed<F, R>(&mut self, f: F) -> Result<DetailedProfileResult<R>>
135 where
136 F: FnOnce(&mut ProfilerContext) -> R,
137 {
138 let mut context = ProfilerContext::new(self);
139 let start = Instant::now();
140
141 let result = f(&mut context);
142
143 let elapsed = start.elapsed();
144
145 let recommendations = Vec::new();
147
148 Ok(DetailedProfileResult {
149 result,
150 total_time: elapsed,
151 phase_timings: context.phase_timings,
152 function_timings: context.function_timings,
153 memory_snapshots: context.memory_snapshots,
154 recommendations,
155 })
156 }
157
158 pub fn profile_memory<F, R>(&mut self, f: F) -> Result<MemoryProfile<R>>
160 where
161 F: FnOnce() -> R,
162 {
163 self.memory_tracker.start_detailed();
164 let start_memory = self.memory_tracker.current_usage();
165
166 let result = f();
167
168 let end_memory = self.memory_tracker.current_usage();
169 let allocations = self.memory_tracker.get_allocations();
170
171 Ok(MemoryProfile {
172 result,
173 initial_memory: start_memory,
174 final_memory: end_memory,
175 peak_memory: self.memory_tracker.peak_usage(),
176 allocations,
177 allocation_hotspots: self.memory_tracker.get_hotspots()?,
178 })
179 }
180
181 pub fn identify_bottlenecks(&self) -> Result<BottleneckAnalysis> {
183 let hotspots = self.identify_hotspots()?;
184 let slow_functions = self.find_slow_functions()?;
185 let memory_bottlenecks = self.memory_tracker.find_bottlenecks()?;
186 let cache_inefficiencies = self.cache_analyzer.find_inefficiencies()?;
187
188 let severity_score = self.calculate_severity_score(&hotspots, &slow_functions)?;
189
190 Ok(BottleneckAnalysis {
191 hotspots,
192 slow_functions,
193 memory_bottlenecks,
194 cache_inefficiencies,
195 severity_score,
196 })
197 }
198
199 pub fn generate_optimization_hints(
201 &self,
202 metrics: &ProfileMetrics,
203 ) -> Result<Vec<OptimizationHint>> {
204 let mut hints = Vec::new();
205
206 if metrics.memory_usage.peak > metrics.memory_usage.current * 2 {
208 hints.push(OptimizationHint {
209 category: OptimizationCategory::Memory,
210 priority: Priority::High,
211 description: "High memory fragmentation detected".to_string(),
212 suggestion: "Consider using memory pools or arena allocators".to_string(),
213 expected_improvement: ImprovementEstimate::Percentage(20.0),
214 });
215 }
216
217 if metrics.cache_misses > 1000000 {
219 hints.push(OptimizationHint {
220 category: OptimizationCategory::CacheEfficiency,
221 priority: Priority::High,
222 description: "High cache miss rate detected".to_string(),
223 suggestion: "Improve data locality, consider tiling or blocking".to_string(),
224 expected_improvement: ImprovementEstimate::Percentage(30.0),
225 });
226 }
227
228 if metrics.branch_mispredictions > metrics.instruction_count / 100 {
230 hints.push(OptimizationHint {
231 category: OptimizationCategory::BranchPrediction,
232 priority: Priority::Medium,
233 description: "High branch misprediction rate".to_string(),
234 suggestion: "Reduce conditional branches, consider branchless algorithms"
235 .to_string(),
236 expected_improvement: ImprovementEstimate::Percentage(10.0),
237 });
238 }
239
240 Ok(hints)
241 }
242
243 fn reset(&mut self) {
245 self.counters.reset();
246 self.memory_tracker.reset();
247 self.cache_analyzer.reset();
248 self.timeline.clear();
249 }
250
251 fn identify_hotspots(&self) -> Result<Vec<Hotspot>> {
252 Ok(vec![
253 Hotspot {
254 location: "matrix_multiply".to_string(),
255 time_percentage: 45.0,
256 call_count: 1000,
257 average_time: Duration::from_micros(100),
258 },
259 Hotspot {
260 location: "gradient_computation".to_string(),
261 time_percentage: 30.0,
262 call_count: 500,
263 average_time: Duration::from_micros(150),
264 },
265 ])
266 }
267
268 fn find_slow_functions(&self) -> Result<Vec<SlowFunction>> {
269 Ok(vec![SlowFunction {
270 name: "backpropagation".to_string(),
271 time: Duration::from_millis(500),
272 call_count: 100,
273 reason: "Large matrix operations".to_string(),
274 }])
275 }
276
277 fn calculate_severity_score(
278 &self,
279 hotspots: &[Hotspot],
280 slow_functions: &[SlowFunction],
281 ) -> Result<f64> {
282 let hotspot_score: f64 = hotspots.iter().map(|h| h.time_percentage).sum();
283 let slow_func_score = slow_functions.len() as f64 * 10.0;
284 Ok((hotspot_score + slow_func_score) / 100.0)
285 }
286}
287
288impl Default for PerformanceProfiler {
289 fn default() -> Self {
290 Self::new()
291 }
292}
293
294pub struct ProfilerContext<'a> {
300 profiler: &'a mut PerformanceProfiler,
301 phase_timings: HashMap<String, Duration>,
302 function_timings: HashMap<String, Vec<Duration>>,
303 memory_snapshots: Vec<MemorySnapshot>,
304 current_phase: Option<String>,
305}
306
307impl<'a> ProfilerContext<'a> {
308 fn new(profiler: &'a mut PerformanceProfiler) -> Self {
309 Self {
310 profiler,
311 phase_timings: HashMap::new(),
312 function_timings: HashMap::new(),
313 memory_snapshots: Vec::new(),
314 current_phase: None,
315 }
316 }
317
318 pub fn enter_phase(&mut self, name: impl Into<String>) {
320 let phase_name = name.into();
321 self.current_phase = Some(phase_name);
322 }
323
324 pub fn exit_phase(&mut self, duration: Duration) {
326 if let Some(phase_name) = self.current_phase.take() {
327 self.phase_timings.insert(phase_name, duration);
328 }
329 }
330
331 pub fn record_function<F, R>(&mut self, name: impl Into<String>, f: F) -> R
333 where
334 F: FnOnce() -> R,
335 {
336 let function_name = name.into();
337 let start = Instant::now();
338 let result = f();
339 let elapsed = start.elapsed();
340
341 self.function_timings
342 .entry(function_name)
343 .or_default()
344 .push(elapsed);
345
346 result
347 }
348
349 pub fn snapshot_memory(&mut self, label: impl Into<String>) {
351 let snapshot = MemorySnapshot {
352 label: label.into(),
353 timestamp: Instant::now(),
354 bytes_used: self.profiler.memory_tracker.current_usage(),
355 allocation_count: self.profiler.memory_tracker.allocation_count(),
356 };
357 self.memory_snapshots.push(snapshot);
358 }
359}
360
361#[derive(Debug)]
367pub struct ProfileResult<R> {
368 pub result: R,
370 pub metrics: ProfileMetrics,
372 pub timeline: ExecutionTimeline,
374 pub hotspots: Vec<Hotspot>,
376 pub optimization_hints: Vec<OptimizationHint>,
378}
379
380#[derive(Debug)]
382pub struct DetailedProfileResult<R> {
383 pub result: R,
385 pub total_time: Duration,
387 pub phase_timings: HashMap<String, Duration>,
389 pub function_timings: HashMap<String, Vec<Duration>>,
391 pub memory_snapshots: Vec<MemorySnapshot>,
393 pub recommendations: Vec<OptimizationHint>,
395}
396
397#[derive(Debug)]
399pub struct MemoryProfile<R> {
400 pub result: R,
402 pub initial_memory: usize,
404 pub final_memory: usize,
406 pub peak_memory: usize,
408 pub allocations: Vec<Allocation>,
410 pub allocation_hotspots: Vec<AllocationHotspot>,
412}
413
414#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct ProfileMetrics {
417 pub total_time: Duration,
419 pub cpu_time: Duration,
421 pub wall_time: Duration,
423 pub memory_usage: MemoryUsage,
425 pub cache_stats: CacheStats,
427 pub instruction_count: u64,
429 pub branch_mispredictions: u64,
431 pub cache_misses: u64,
433}
434
435impl Default for ProfileMetrics {
436 fn default() -> Self {
437 Self {
438 total_time: Duration::from_secs(0),
439 cpu_time: Duration::from_secs(0),
440 wall_time: Duration::from_secs(0),
441 memory_usage: MemoryUsage::default(),
442 cache_stats: CacheStats::default(),
443 instruction_count: 0,
444 branch_mispredictions: 0,
445 cache_misses: 0,
446 }
447 }
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize, Default)]
452pub struct MemoryUsage {
453 pub current: usize,
454 pub peak: usize,
455 pub allocations: usize,
456 pub deallocations: usize,
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize, Default)]
461pub struct CacheStats {
462 pub l1_hits: u64,
463 pub l1_misses: u64,
464 pub l2_hits: u64,
465 pub l2_misses: u64,
466 pub l3_hits: u64,
467 pub l3_misses: u64,
468}
469
470#[derive(Debug, Clone)]
472pub struct Hotspot {
473 pub location: String,
474 pub time_percentage: f64,
475 pub call_count: usize,
476 pub average_time: Duration,
477}
478
479#[derive(Debug, Clone)]
481pub struct SlowFunction {
482 pub name: String,
483 pub time: Duration,
484 pub call_count: usize,
485 pub reason: String,
486}
487
488#[derive(Debug, Clone)]
490pub struct Allocation {
491 pub size: usize,
492 pub location: String,
493 pub timestamp: Instant,
494}
495
496#[derive(Debug, Clone)]
498pub struct AllocationHotspot {
499 pub location: String,
500 pub total_bytes: usize,
501 pub allocation_count: usize,
502}
503
504#[derive(Debug, Clone)]
506pub struct MemorySnapshot {
507 pub label: String,
508 pub timestamp: Instant,
509 pub bytes_used: usize,
510 pub allocation_count: usize,
511}
512
513#[derive(Debug)]
515pub struct BottleneckAnalysis {
516 pub hotspots: Vec<Hotspot>,
517 pub slow_functions: Vec<SlowFunction>,
518 pub memory_bottlenecks: Vec<MemoryBottleneck>,
519 pub cache_inefficiencies: Vec<CacheInefficiency>,
520 pub severity_score: f64,
521}
522
523#[derive(Debug, Clone)]
525pub struct MemoryBottleneck {
526 pub location: String,
527 pub issue: String,
528 pub severity: Severity,
529}
530
531#[derive(Debug, Clone)]
533pub struct CacheInefficiency {
534 pub location: String,
535 pub miss_rate: f64,
536 pub recommendation: String,
537}
538
539#[derive(Debug, Clone)]
541pub struct OptimizationHint {
542 pub category: OptimizationCategory,
543 pub priority: Priority,
544 pub description: String,
545 pub suggestion: String,
546 pub expected_improvement: ImprovementEstimate,
547}
548
549#[derive(Debug, Clone, PartialEq, Eq)]
551pub enum OptimizationCategory {
552 Memory,
553 CacheEfficiency,
554 BranchPrediction,
555 SIMD,
556 Parallelization,
557 AlgorithmChoice,
558 DataStructure,
559}
560
561#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
563pub enum Priority {
564 Low,
565 Medium,
566 High,
567 Critical,
568}
569
570#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
572pub enum Severity {
573 Low,
574 Medium,
575 High,
576 Critical,
577}
578
579#[derive(Debug, Clone)]
581pub enum ImprovementEstimate {
582 Percentage(f64),
583 TimeReduction(Duration),
584 MemoryReduction(usize),
585}
586
587#[derive(Debug, Clone)]
589pub struct ProfilerConfig {
590 pub enable_memory_tracking: bool,
591 pub enable_cache_analysis: bool,
592 pub enable_timeline: bool,
593 pub sampling_interval: Duration,
594 pub max_hotspots: usize,
595}
596
597impl Default for ProfilerConfig {
598 fn default() -> Self {
599 Self {
600 enable_memory_tracking: true,
601 enable_cache_analysis: true,
602 enable_timeline: true,
603 sampling_interval: Duration::from_millis(1),
604 max_hotspots: 10,
605 }
606 }
607}
608
609#[derive(Debug)]
615struct PerformanceCounters {
616 start_time: Option<Instant>,
617 instructions: u64,
618 branch_mispredicts: u64,
619 cache_misses: u64,
620}
621
622impl PerformanceCounters {
623 fn new() -> Self {
624 Self {
625 start_time: None,
626 instructions: 0,
627 branch_mispredicts: 0,
628 cache_misses: 0,
629 }
630 }
631
632 fn start(&mut self) {
633 self.start_time = Some(Instant::now());
634 }
635
636 fn stop(&mut self) {
637 self.start_time = None;
638 }
639
640 fn reset(&mut self) {
641 self.instructions = 0;
642 self.branch_mispredicts = 0;
643 self.cache_misses = 0;
644 }
645
646 fn cpu_time(&self) -> Duration {
647 self.start_time
648 .map(|start| start.elapsed())
649 .unwrap_or_default()
650 }
651
652 fn instruction_count(&self) -> u64 {
653 self.instructions
654 }
655
656 fn branch_mispredictions(&self) -> u64 {
657 self.branch_mispredicts
658 }
659
660 fn cache_misses(&self) -> u64 {
661 self.cache_misses
662 }
663}
664
665#[derive(Debug)]
667struct MemoryTracker {
668 current: usize,
669 peak: usize,
670 allocations: Vec<Allocation>,
671 allocation_count: usize,
672}
673
674impl MemoryTracker {
675 fn new() -> Self {
676 Self {
677 current: 0,
678 peak: 0,
679 allocations: Vec::new(),
680 allocation_count: 0,
681 }
682 }
683
684 fn start(&mut self) {
685 }
687
688 fn start_detailed(&mut self) {
689 }
691
692 fn stop(&mut self) {
693 }
695
696 fn reset(&mut self) {
697 self.current = 0;
698 self.peak = 0;
699 self.allocations.clear();
700 self.allocation_count = 0;
701 }
702
703 fn current_usage(&self) -> usize {
704 self.current
705 }
706
707 fn peak_usage(&self) -> usize {
708 self.peak
709 }
710
711 fn allocation_count(&self) -> usize {
712 self.allocation_count
713 }
714
715 fn get_usage(&self) -> MemoryUsage {
716 MemoryUsage {
717 current: self.current,
718 peak: self.peak,
719 allocations: self.allocation_count,
720 deallocations: 0,
721 }
722 }
723
724 fn get_allocations(&self) -> Vec<Allocation> {
725 self.allocations.clone()
726 }
727
728 fn get_hotspots(&self) -> Result<Vec<AllocationHotspot>> {
729 Ok(vec![])
730 }
731
732 fn find_bottlenecks(&self) -> Result<Vec<MemoryBottleneck>> {
733 Ok(vec![])
734 }
735}
736
737#[derive(Debug)]
739struct CacheAnalyzer {
740 stats: CacheStats,
741}
742
743impl CacheAnalyzer {
744 fn new() -> Self {
745 Self {
746 stats: CacheStats::default(),
747 }
748 }
749
750 fn reset(&mut self) {
751 self.stats = CacheStats::default();
752 }
753
754 fn get_stats(&self) -> CacheStats {
755 self.stats.clone()
756 }
757
758 fn find_inefficiencies(&self) -> Result<Vec<CacheInefficiency>> {
759 Ok(vec![])
760 }
761}
762
763#[derive(Debug, Clone)]
765pub struct ExecutionTimeline {
766 events: Vec<TimelineEvent>,
767}
768
769impl ExecutionTimeline {
770 fn new() -> Self {
771 Self { events: Vec::new() }
772 }
773
774 fn clear(&mut self) {
775 self.events.clear();
776 }
777}
778
779#[derive(Debug, Clone)]
781struct TimelineEvent {
782 timestamp: Instant,
783 event_type: String,
784 duration: Option<Duration>,
785}
786
787#[cfg(test)]
788mod tests {
789 use super::*;
790
791 #[test]
792 fn test_profiler_creation() {
793 let profiler = PerformanceProfiler::new();
794 assert!(profiler.config.enable_memory_tracking);
795 assert!(profiler.config.enable_cache_analysis);
796 }
797
798 #[test]
799 fn test_profile_execution() {
800 let mut profiler = PerformanceProfiler::new();
801 let result = profiler.profile(|| {
802 let mut sum = 0;
804 for i in 0..1000 {
805 sum += i;
806 }
807 sum
808 });
809
810 assert!(result.is_ok());
811 let profile = result.unwrap();
812 assert_eq!(profile.result, 499500);
813 }
814
815 #[test]
816 fn test_profiler_config() {
817 let config = ProfilerConfig::default();
818 assert!(config.enable_memory_tracking);
819 assert_eq!(config.max_hotspots, 10);
820 }
821
822 #[test]
823 fn test_optimization_category() {
824 let cat1 = OptimizationCategory::Memory;
825 let cat2 = OptimizationCategory::CacheEfficiency;
826 assert_ne!(cat1, cat2);
827 }
828
829 #[test]
830 fn test_priority_ordering() {
831 assert!(Priority::Critical > Priority::High);
832 assert!(Priority::High > Priority::Medium);
833 assert!(Priority::Medium > Priority::Low);
834 }
835
836 #[test]
837 fn test_severity_ordering() {
838 assert!(Severity::Critical > Severity::High);
839 assert!(Severity::High > Severity::Medium);
840 assert!(Severity::Medium > Severity::Low);
841 }
842
843 #[test]
844 fn test_memory_usage_default() {
845 let usage = MemoryUsage::default();
846 assert_eq!(usage.current, 0);
847 assert_eq!(usage.peak, 0);
848 }
849
850 #[test]
851 fn test_cache_stats_default() {
852 let stats = CacheStats::default();
853 assert_eq!(stats.l1_hits, 0);
854 assert_eq!(stats.l1_misses, 0);
855 }
856}