#![allow(dead_code)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum VectorizationStatus {
FullyVectorized {
width: u32,
},
PartiallyVectorized {
fraction: f32,
width: u32,
},
Scalar,
BlockedByDependency,
}
impl VectorizationStatus {
#[must_use]
pub fn is_vectorized(&self) -> bool {
!matches!(self, Self::Scalar | Self::BlockedByDependency)
}
#[must_use]
pub fn simd_width(&self) -> u32 {
match self {
Self::FullyVectorized { width } | Self::PartiallyVectorized { width, .. } => *width,
Self::Scalar | Self::BlockedByDependency => 1,
}
}
}
#[derive(Debug, Clone)]
pub struct VectorizableLoop {
pub name: String,
pub iteration_count: u64,
pub status: VectorizationStatus,
}
impl VectorizableLoop {
#[must_use]
pub fn new(name: impl Into<String>, iteration_count: u64, status: VectorizationStatus) -> Self {
Self {
name: name.into(),
iteration_count,
status,
}
}
#[allow(clippy::cast_precision_loss)]
#[must_use]
pub fn estimated_speedup(&self) -> f32 {
match &self.status {
VectorizationStatus::FullyVectorized { width } => *width as f32,
VectorizationStatus::PartiallyVectorized { fraction, width } => {
let vec_part = *fraction * *width as f32;
let scalar_part = (1.0 - fraction) * 1.0_f32;
1.0 / ((*fraction / *width as f32) + (1.0 - fraction) / 1.0)
}
VectorizationStatus::Scalar | VectorizationStatus::BlockedByDependency => 1.0,
}
}
}
#[derive(Debug, Default)]
pub struct VectorizationReport {
loops: Vec<VectorizableLoop>,
}
impl VectorizationReport {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add_loop(&mut self, lp: VectorizableLoop) {
self.loops.push(lp);
}
#[must_use]
pub fn vectorized_loops(&self) -> &[VectorizableLoop] {
&self.loops
}
#[must_use]
pub fn fully_vectorized_count(&self) -> usize {
self.loops
.iter()
.filter(|l| matches!(l.status, VectorizationStatus::FullyVectorized { .. }))
.count()
}
#[allow(clippy::cast_precision_loss)]
#[must_use]
pub fn speedup_ratio(&self) -> f32 {
if self.loops.is_empty() {
return 1.0;
}
let total_iters: u64 = self.loops.iter().map(|l| l.iteration_count).sum();
if total_iters == 0 {
return 1.0;
}
let weighted_sum: f64 = self
.loops
.iter()
.map(|l| l.estimated_speedup() as f64 * l.iteration_count as f64)
.sum();
(weighted_sum / total_iters as f64) as f32
}
#[must_use]
pub fn loop_count(&self) -> usize {
self.loops.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_status_fully_vectorized_is_vectorized() {
let s = VectorizationStatus::FullyVectorized { width: 8 };
assert!(s.is_vectorized());
}
#[test]
fn test_status_scalar_not_vectorized() {
assert!(!VectorizationStatus::Scalar.is_vectorized());
}
#[test]
fn test_status_blocked_not_vectorized() {
assert!(!VectorizationStatus::BlockedByDependency.is_vectorized());
}
#[test]
fn test_status_partial_is_vectorized() {
let s = VectorizationStatus::PartiallyVectorized { fraction: 0.5, width: 4 };
assert!(s.is_vectorized());
}
#[test]
fn test_simd_width_full() {
let s = VectorizationStatus::FullyVectorized { width: 16 };
assert_eq!(s.simd_width(), 16);
}
#[test]
fn test_simd_width_scalar() {
assert_eq!(VectorizationStatus::Scalar.simd_width(), 1);
}
#[test]
fn test_simd_width_blocked() {
assert_eq!(VectorizationStatus::BlockedByDependency.simd_width(), 1);
}
#[test]
fn test_loop_speedup_scalar() {
let lp = VectorizableLoop::new("scalar_loop", 1000, VectorizationStatus::Scalar);
assert!((lp.estimated_speedup() - 1.0).abs() < 1e-5);
}
#[test]
fn test_loop_speedup_fully_vectorized() {
let lp = VectorizableLoop::new(
"vec_loop",
1000,
VectorizationStatus::FullyVectorized { width: 8 },
);
assert!((lp.estimated_speedup() - 8.0).abs() < 1e-5);
}
#[test]
fn test_loop_speedup_partial_greater_than_1() {
let lp = VectorizableLoop::new(
"partial",
500,
VectorizationStatus::PartiallyVectorized { fraction: 0.8, width: 4 },
);
assert!(lp.estimated_speedup() > 1.0);
}
#[test]
fn test_report_empty_speedup_ratio() {
let report = VectorizationReport::new();
assert!((report.speedup_ratio() - 1.0).abs() < 1e-5);
}
#[test]
fn test_report_add_and_count() {
let mut report = VectorizationReport::new();
report.add_loop(VectorizableLoop::new(
"lp1",
100,
VectorizationStatus::FullyVectorized { width: 4 },
));
report.add_loop(VectorizableLoop::new("lp2", 50, VectorizationStatus::Scalar));
assert_eq!(report.loop_count(), 2);
}
#[test]
fn test_report_fully_vectorized_count() {
let mut report = VectorizationReport::new();
report.add_loop(VectorizableLoop::new(
"a",
100,
VectorizationStatus::FullyVectorized { width: 8 },
));
report.add_loop(VectorizableLoop::new("b", 100, VectorizationStatus::Scalar));
report.add_loop(VectorizableLoop::new(
"c",
100,
VectorizationStatus::FullyVectorized { width: 4 },
));
assert_eq!(report.fully_vectorized_count(), 2);
}
#[test]
fn test_report_speedup_ratio_all_full() {
let mut report = VectorizationReport::new();
report.add_loop(VectorizableLoop::new(
"lp",
1000,
VectorizationStatus::FullyVectorized { width: 8 },
));
assert!((report.speedup_ratio() - 8.0).abs() < 0.01);
}
#[test]
fn test_vectorized_loops_slice() {
let mut report = VectorizationReport::new();
report.add_loop(VectorizableLoop::new("x", 10, VectorizationStatus::Scalar));
assert_eq!(report.vectorized_loops().len(), 1);
assert_eq!(report.vectorized_loops()[0].name, "x");
}
}