torsh_quantization/analysis/
speed.rs1use crate::QScheme;
4use std::time::{Duration, Instant};
5
6pub struct SpeedAnalyzer;
8
9impl SpeedAnalyzer {
10 pub fn estimate_speed_improvement(scheme: QScheme) -> f32 {
12 match scheme {
13 QScheme::Binary => 8.0, QScheme::Ternary => 6.0, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 4.0, QScheme::PerTensorAffine
17 | QScheme::PerChannelAffine
18 | QScheme::PerTensorSymmetric
19 | QScheme::PerChannelSymmetric => 2.0, QScheme::MixedPrecision => 1.5, QScheme::GroupWise => 2.5, }
23 }
24
25 pub fn benchmark_operation<F>(operation: F, iterations: usize) -> Duration
27 where
28 F: Fn(),
29 {
30 let start = Instant::now();
31 for _ in 0..iterations {
32 operation();
33 }
34 start.elapsed()
35 }
36
37 pub fn calculate_throughput(duration: Duration, operations: usize) -> f64 {
39 operations as f64 / duration.as_secs_f64()
40 }
41
42 pub fn compare_schemes(
44 _num_operations: usize,
45 _baseline_duration: Duration,
46 ) -> std::collections::HashMap<QScheme, f32> {
47 let mut comparison = std::collections::HashMap::new();
48
49 let schemes = vec![
50 QScheme::Binary,
51 QScheme::Ternary,
52 QScheme::Int4PerTensor,
53 QScheme::PerTensorAffine,
54 QScheme::PerChannelAffine,
55 QScheme::MixedPrecision,
56 QScheme::GroupWise,
57 ];
58
59 for scheme in schemes {
60 let estimated_improvement = Self::estimate_speed_improvement(scheme);
61 comparison.insert(scheme, estimated_improvement);
62 }
63
64 comparison
65 }
66
67 pub fn generate_speed_report(
69 baseline_duration: Duration,
70 quantized_duration: Duration,
71 operations: usize,
72 ) -> String {
73 let speedup = baseline_duration.as_secs_f64() / quantized_duration.as_secs_f64();
74 let baseline_throughput = Self::calculate_throughput(baseline_duration, operations);
75 let quantized_throughput = Self::calculate_throughput(quantized_duration, operations);
76
77 format!(
78 "Speed Analysis Report:\n\
79 - Baseline Duration: {:.3}ms\n\
80 - Quantized Duration: {:.3}ms\n\
81 - Speed Improvement: {:.2}x\n\
82 - Baseline Throughput: {:.0} ops/sec\n\
83 - Quantized Throughput: {:.0} ops/sec\n\
84 - Efficiency Gain: {:.2}%",
85 baseline_duration.as_millis(),
86 quantized_duration.as_millis(),
87 speedup,
88 baseline_throughput,
89 quantized_throughput,
90 (speedup - 1.0) * 100.0
91 )
92 }
93}