Skip to main content

torsh_quantization/analysis/
speed.rs

1//! Speed analysis for quantized operations
2
3use crate::QScheme;
4use std::time::{Duration, Instant};
5
6/// Speed analysis for quantized operations
7pub struct SpeedAnalyzer;
8
9impl SpeedAnalyzer {
10    /// Estimate speed improvement for different quantization schemes
11    pub fn estimate_speed_improvement(scheme: QScheme) -> f32 {
12        match scheme {
13            QScheme::Binary => 8.0,  // Binary operations are very fast
14            QScheme::Ternary => 6.0, // Ternary operations are fast
15            QScheme::Int4PerTensor | QScheme::Int4PerChannel => 4.0, // 4-bit operations
16            QScheme::PerTensorAffine
17            | QScheme::PerChannelAffine
18            | QScheme::PerTensorSymmetric
19            | QScheme::PerChannelSymmetric => 2.0, // 8-bit operations
20            QScheme::MixedPrecision => 1.5, // Mixed precision
21            QScheme::GroupWise => 2.5, // Group-wise quantization
22        }
23    }
24
25    /// Benchmark operation speed
26    pub fn benchmark_operation<F>(operation: F, iterations: usize) -> Duration
27    where
28        F: Fn(),
29    {
30        let start = Instant::now();
31        for _ in 0..iterations {
32            operation();
33        }
34        start.elapsed()
35    }
36
37    /// Calculate throughput (operations per second)
38    pub fn calculate_throughput(duration: Duration, operations: usize) -> f64 {
39        operations as f64 / duration.as_secs_f64()
40    }
41
42    /// Compare speed between different quantization schemes
43    pub fn compare_schemes(
44        _num_operations: usize,
45        _baseline_duration: Duration,
46    ) -> std::collections::HashMap<QScheme, f32> {
47        let mut comparison = std::collections::HashMap::new();
48
49        let schemes = vec![
50            QScheme::Binary,
51            QScheme::Ternary,
52            QScheme::Int4PerTensor,
53            QScheme::PerTensorAffine,
54            QScheme::PerChannelAffine,
55            QScheme::MixedPrecision,
56            QScheme::GroupWise,
57        ];
58
59        for scheme in schemes {
60            let estimated_improvement = Self::estimate_speed_improvement(scheme);
61            comparison.insert(scheme, estimated_improvement);
62        }
63
64        comparison
65    }
66
67    /// Generate speed analysis report
68    pub fn generate_speed_report(
69        baseline_duration: Duration,
70        quantized_duration: Duration,
71        operations: usize,
72    ) -> String {
73        let speedup = baseline_duration.as_secs_f64() / quantized_duration.as_secs_f64();
74        let baseline_throughput = Self::calculate_throughput(baseline_duration, operations);
75        let quantized_throughput = Self::calculate_throughput(quantized_duration, operations);
76
77        format!(
78            "Speed Analysis Report:\n\
79             - Baseline Duration: {:.3}ms\n\
80             - Quantized Duration: {:.3}ms\n\
81             - Speed Improvement: {:.2}x\n\
82             - Baseline Throughput: {:.0} ops/sec\n\
83             - Quantized Throughput: {:.0} ops/sec\n\
84             - Efficiency Gain: {:.2}%",
85            baseline_duration.as_millis(),
86            quantized_duration.as_millis(),
87            speedup,
88            baseline_throughput,
89            quantized_throughput,
90            (speedup - 1.0) * 100.0
91        )
92    }
93}