trustformers_debug/profiler/
events.rs1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::time::Duration;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub enum ProfileEvent {
10 FunctionCall {
11 function_name: String,
12 duration: Duration,
13 memory_delta: i64,
14 },
15 LayerExecution {
16 layer_name: String,
17 layer_type: String,
18 forward_time: Duration,
19 backward_time: Option<Duration>,
20 memory_usage: usize,
21 parameter_count: usize,
22 },
23 TensorOperation {
24 operation: String,
25 tensor_shape: Vec<usize>,
26 duration: Duration,
27 memory_allocated: usize,
28 },
29 ModelInference {
30 batch_size: usize,
31 sequence_length: usize,
32 duration: Duration,
33 tokens_per_second: f64,
34 },
35 GradientComputation {
36 layer_name: String,
37 gradient_norm: f64,
38 duration: Duration,
39 },
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ProfileStats {
45 pub event_type: String,
46 pub count: usize,
47 pub total_duration: Duration,
48 pub avg_duration: Duration,
49 pub min_duration: Duration,
50 pub max_duration: Duration,
51 pub total_memory: i64,
52 pub avg_memory: f64,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct MemorySnapshot {
58 pub timestamp: chrono::DateTime<chrono::Utc>,
59 pub heap_allocated: usize,
60 pub heap_used: usize,
61 pub stack_size: usize,
62 pub gpu_allocated: Option<usize>,
63 pub gpu_used: Option<usize>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct PerformanceBottleneck {
69 pub bottleneck_type: BottleneckType,
70 pub location: String,
71 pub severity: BottleneckSeverity,
72 pub description: String,
73 pub suggestion: String,
74 pub metrics: HashMap<String, f64>,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub enum BottleneckType {
79 CpuBound,
80 MemoryBound,
81 IoBound,
82 GpuBound,
83 NetworkBound,
84 DataLoading,
85 ModelComputation,
86 GradientComputation,
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub enum BottleneckSeverity {
91 Low,
92 Medium,
93 High,
94 Critical,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct CpuProfile {
100 pub function_name: String,
101 pub self_time: Duration,
102 pub total_time: Duration,
103 pub call_count: usize,
104 pub children: Vec<CpuProfile>,
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct CpuBottleneckAnalysis {
110 pub thread_id: u64,
111 pub cpu_usage: f64,
112 pub context_switches: u64,
113 pub cache_misses: u64,
114 pub instructions_per_cycle: f64,
115 pub branch_mispredictions: u64,
116 pub hot_functions: Vec<HotFunction>,
117 pub bottleneck_score: f64,
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct HotFunction {
122 pub function_name: String,
123 pub self_time_percentage: f64,
124 pub call_count: usize,
125 pub avg_time_per_call: Duration,
126}