use crate::QScheme;
use std::time::{Duration, Instant};
pub struct SpeedAnalyzer;
impl SpeedAnalyzer {
pub fn estimate_speed_improvement(scheme: QScheme) -> f32 {
match scheme {
QScheme::Binary => 8.0, QScheme::Ternary => 6.0, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 4.0, QScheme::PerTensorAffine
| QScheme::PerChannelAffine
| QScheme::PerTensorSymmetric
| QScheme::PerChannelSymmetric => 2.0, QScheme::MixedPrecision => 1.5, QScheme::GroupWise => 2.5, }
}
pub fn benchmark_operation<F>(operation: F, iterations: usize) -> Duration
where
F: Fn(),
{
let start = Instant::now();
for _ in 0..iterations {
operation();
}
start.elapsed()
}
pub fn calculate_throughput(duration: Duration, operations: usize) -> f64 {
operations as f64 / duration.as_secs_f64()
}
pub fn compare_schemes(
num_operations: usize,
baseline_duration: Duration,
) -> std::collections::HashMap<QScheme, f32> {
let mut comparison = std::collections::HashMap::new();
let schemes = vec![
QScheme::Binary,
QScheme::Ternary,
QScheme::Int4PerTensor,
QScheme::PerTensorAffine,
QScheme::PerChannelAffine,
QScheme::MixedPrecision,
QScheme::GroupWise,
];
for scheme in schemes {
let estimated_improvement = Self::estimate_speed_improvement(scheme);
comparison.insert(scheme, estimated_improvement);
}
comparison
}
pub fn generate_speed_report(
baseline_duration: Duration,
quantized_duration: Duration,
operations: usize,
) -> String {
let speedup = baseline_duration.as_secs_f64() / quantized_duration.as_secs_f64();
let baseline_throughput = Self::calculate_throughput(baseline_duration, operations);
let quantized_throughput = Self::calculate_throughput(quantized_duration, operations);
format!(
"Speed Analysis Report:\n\
- Baseline Duration: {:.3}ms\n\
- Quantized Duration: {:.3}ms\n\
- Speed Improvement: {:.2}x\n\
- Baseline Throughput: {:.0} ops/sec\n\
- Quantized Throughput: {:.0} ops/sec\n\
- Efficiency Gain: {:.2}%",
baseline_duration.as_millis(),
quantized_duration.as_millis(),
speedup,
baseline_throughput,
quantized_throughput,
(speedup - 1.0) * 100.0
)
}
}