trustformers_models/comprehensive_testing/
performance.rs1use anyhow::Result;
4use std::collections::HashMap;
5use std::time::{Duration, Instant};
6use trustformers_core::tensor::Tensor;
7use trustformers_core::traits::Model;
8
9use super::config::{TestDataType, TestInputConfig, ValidationConfig};
10use super::types::{
11 LayerPerformance, MemoryAnalysis, OverallPerformance, PerformanceResults,
12 ThroughputMeasurements,
13};
14
15pub struct PerformanceProfiler {
17 config: ValidationConfig,
18}
19
20impl PerformanceProfiler {
21 pub fn new() -> Self {
23 Self {
24 config: ValidationConfig::default(),
25 }
26 }
27
28 pub fn with_config(config: ValidationConfig) -> Self {
30 Self { config }
31 }
32
33 pub fn profile_model<M: Model<Input = Tensor, Output = Tensor>>(
35 &self,
36 model: &M,
37 ) -> Result<PerformanceResults> {
38 let mut layer_performance = Vec::new();
39 let mut total_time = Duration::ZERO;
40 let total_memory = 0.0;
41
42 for test_input in &self.config.test_inputs {
44 let input = self.create_test_input(test_input)?;
45
46 let start_time = Instant::now();
47 let _output = model.forward(input)?;
48 let inference_time = start_time.elapsed();
49
50 total_time += inference_time;
51
52 layer_performance.push(LayerPerformance {
55 layer_name: format!("layer_{}", layer_performance.len()),
56 layer_type: "transformer".to_string(),
57 forward_time: inference_time / 10, memory_usage_mb: 100.0, flops: None,
60 utilization_percent: None,
61 });
62 }
63
64 let overall_performance = OverallPerformance {
65 total_inference_time: total_time,
66 tokens_per_second: 1000.0, total_flops: None,
68 peak_memory_mb: total_memory,
69 average_memory_mb: total_memory / self.config.test_inputs.len() as f64,
70 };
71
72 let memory_analysis = MemoryAnalysis {
73 by_layer_type: HashMap::new(),
74 by_tensor_type: HashMap::new(),
75 efficiency_score: 75.0, fragmentation_percent: 5.0, };
78
79 let throughput = ThroughputMeasurements {
80 batch_size: 1,
81 sequence_length: 128,
82 tokens_per_second: 1000.0,
83 samples_per_second: 10.0,
84 latency_per_token_ms: 1.0,
85 };
86
87 Ok(PerformanceResults {
88 layer_performance,
89 overall_performance,
90 memory_analysis,
91 throughput,
92 })
93 }
94
95 fn create_test_input(&self, config: &TestInputConfig) -> Result<Tensor> {
97 match config.data_type {
98 TestDataType::I32 => {
99 let mut input_ids = Vec::new();
101 for i in 0..config.dimensions.iter().product::<usize>() {
102 input_ids.push(((i % 1000 + 1) as i32) as f32); }
104 Ok(Tensor::from_vec(input_ids, &config.dimensions)?)
105 },
106 TestDataType::F32 => {
107 Ok(Tensor::randn(&config.dimensions)?)
109 },
110 TestDataType::F16 => {
111 Ok(Tensor::randn(&config.dimensions)?)
113 },
114 TestDataType::I64 => {
115 let mut input_ids = Vec::new();
117 for i in 0..config.dimensions.iter().product::<usize>() {
118 input_ids.push(((i % 1000 + 1) as i64) as f32);
119 }
120 Ok(Tensor::from_vec(input_ids, &config.dimensions)?)
121 },
122 }
123 }
124
125 pub fn get_model_name(&self) -> &str {
127 "Unknown"
128 }
129}
130
131impl Default for PerformanceProfiler {
132 fn default() -> Self {
133 Self::new()
134 }
135}