trustformers_debug/profiler/
report.rs1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::time::Duration;
6
7use super::events::{CpuBottleneckAnalysis, PerformanceBottleneck, ProfileStats};
8use super::gpu::GpuKernelSummary;
9use super::io_monitor::IoDeviceType;
10use super::memory::{MemoryEfficiencyAnalysis, MemoryStats};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct ProfilerReport {
15 pub total_events: usize,
16 pub total_runtime: Duration,
17 pub statistics: HashMap<String, ProfileStats>,
18 pub bottlenecks: Vec<PerformanceBottleneck>,
19 pub slowest_layers: Vec<(String, Duration)>,
20 pub memory_efficiency: MemoryEfficiencyAnalysis,
21 pub recommendations: Vec<String>,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct LayerLatencyAnalysis {
27 pub layer_name: String,
28 pub layer_type: String,
29 pub total_time: Duration,
30 pub cpu_percentage: f64,
31 pub gpu_percentage: f64,
32 pub memory_copy_percentage: f64,
33 pub flops_per_second: f64,
34 pub memory_bandwidth_utilization: f64,
35 pub bottleneck_type: String,
36}
37
38#[derive(Debug, Serialize, Deserialize)]
40pub struct PerformanceAnalysis {
41 pub memory_stats: Option<MemoryStats>,
42 pub io_bandwidth_stats: HashMap<IoDeviceType, f64>,
43 pub layer_analysis: Vec<LayerLatencyAnalysis>,
44 pub gpu_utilization: Option<f64>,
45 pub cpu_bottlenecks: Vec<CpuBottleneckAnalysis>,
46 pub total_gpu_kernels: usize,
47 pub total_io_operations: usize,
48 pub performance_score: f64,
49 pub recommendations: Vec<String>,
50}
51
52#[derive(Debug, Serialize, Deserialize)]
54pub struct EnhancedProfilerReport {
55 pub basic_report: ProfilerReport,
56 pub performance_analysis: PerformanceAnalysis,
57 pub gpu_kernel_summary: GpuKernelSummary,
58 pub memory_allocation_summary: MemoryAllocationSummary,
59 pub io_performance_summary: super::io_monitor::IoPerformanceSummary,
60}
61
62#[derive(Debug, Serialize, Deserialize)]
63pub struct MemoryAllocationSummary {
64 pub total_allocations: usize,
65 pub peak_memory_usage: usize,
66 pub memory_efficiency: f64,
67 pub largest_allocations: Vec<String>,
68 pub memory_leaks: usize,
69}