use std::time::Instant;
#[derive(Debug, Clone)]
pub struct UltraFastCSR {
values: Vec<f64>,
col_indices: Vec<u32>,
row_ptr: Vec<u32>,
rows: usize,
cols: usize,
}
impl UltraFastCSR {
pub fn from_triplets(triplets: Vec<(usize, usize, f64)>, rows: usize, cols: usize) -> Self {
let mut sorted = triplets;
sorted.sort_unstable_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
let nnz = sorted.len();
let mut values = Vec::with_capacity(nnz);
let mut col_indices = Vec::with_capacity(nnz);
let mut row_ptr = vec![0u32; rows + 1];
let mut current_row = 0;
for (row, col, val) in sorted {
while current_row <= row {
row_ptr[current_row] = values.len() as u32;
current_row += 1;
}
values.push(val);
col_indices.push(col as u32);
}
while current_row <= rows {
row_ptr[current_row] = values.len() as u32;
current_row += 1;
}
Self { values, col_indices, row_ptr, rows, cols }
}
#[inline]
pub fn multiply_vector_ultra_fast(&self, x: &[f64], y: &mut [f64]) {
y.fill(0.0);
for row in 0..self.rows {
let start = self.row_ptr[row] as usize;
let end = self.row_ptr[row + 1] as usize;
if start >= end { continue; }
let values = unsafe { self.values.get_unchecked(start..end) };
let indices = unsafe { self.col_indices.get_unchecked(start..end) };
let nnz = end - start;
let mut sum = 0.0;
let chunks = nnz / 8;
let remainder = nnz % 8;
for chunk in 0..chunks {
let base = chunk * 8;
sum += unsafe {
values.get_unchecked(base) * x.get_unchecked(*indices.get_unchecked(base) as usize) +
values.get_unchecked(base + 1) * x.get_unchecked(*indices.get_unchecked(base + 1) as usize) +
values.get_unchecked(base + 2) * x.get_unchecked(*indices.get_unchecked(base + 2) as usize) +
values.get_unchecked(base + 3) * x.get_unchecked(*indices.get_unchecked(base + 3) as usize) +
values.get_unchecked(base + 4) * x.get_unchecked(*indices.get_unchecked(base + 4) as usize) +
values.get_unchecked(base + 5) * x.get_unchecked(*indices.get_unchecked(base + 5) as usize) +
values.get_unchecked(base + 6) * x.get_unchecked(*indices.get_unchecked(base + 6) as usize) +
values.get_unchecked(base + 7) * x.get_unchecked(*indices.get_unchecked(base + 7) as usize)
};
}
for i in (chunks * 8)..(chunks * 8 + remainder) {
sum += unsafe {
values.get_unchecked(i) * x.get_unchecked(*indices.get_unchecked(i) as usize)
};
}
unsafe { *y.get_unchecked_mut(row) = sum; }
}
}
pub fn nnz(&self) -> usize { self.values.len() }
pub fn rows(&self) -> usize { self.rows }
pub fn cols(&self) -> usize { self.cols }
}
pub struct UltraFastCG {
max_iterations: usize,
tolerance: f64,
tolerance_sq: f64,
}
impl UltraFastCG {
pub fn new(max_iterations: usize, tolerance: f64) -> Self {
Self {
max_iterations,
tolerance,
tolerance_sq: tolerance * tolerance,
}
}
pub fn solve(&self, matrix: &UltraFastCSR, b: &[f64]) -> Vec<f64> {
let n = matrix.rows();
assert_eq!(n, matrix.cols());
assert_eq!(n, b.len());
let mut x = vec![0.0; n];
let mut r = b.to_vec(); let mut p = b.to_vec(); let mut ap = vec![0.0; n];
let mut rsold = Self::dot_product_ultra_fast(&r, &r);
for _iteration in 0..self.max_iterations {
if rsold <= self.tolerance_sq { break; }
matrix.multiply_vector_ultra_fast(&p, &mut ap);
let pap = Self::dot_product_ultra_fast(&p, &ap);
if pap.abs() < 1e-16 { break; }
let alpha = rsold / pap;
Self::axpy_ultra_fast(alpha, &p, &mut x);
Self::axpy_ultra_fast(-alpha, &ap, &mut r);
let rsnew = Self::dot_product_ultra_fast(&r, &r);
let beta = rsnew / rsold;
for i in 0..n {
unsafe { *p.get_unchecked_mut(i) = *r.get_unchecked(i) + beta * *p.get_unchecked(i); }
}
rsold = rsnew;
}
x
}
#[inline]
fn dot_product_ultra_fast(x: &[f64], y: &[f64]) -> f64 {
let n = x.len();
let chunks = n / 8;
let remainder = n % 8;
let mut sum = 0.0;
for chunk in 0..chunks {
let base = chunk * 8;
sum += unsafe {
x.get_unchecked(base) * y.get_unchecked(base) +
x.get_unchecked(base + 1) * y.get_unchecked(base + 1) +
x.get_unchecked(base + 2) * y.get_unchecked(base + 2) +
x.get_unchecked(base + 3) * y.get_unchecked(base + 3) +
x.get_unchecked(base + 4) * y.get_unchecked(base + 4) +
x.get_unchecked(base + 5) * y.get_unchecked(base + 5) +
x.get_unchecked(base + 6) * y.get_unchecked(base + 6) +
x.get_unchecked(base + 7) * y.get_unchecked(base + 7)
};
}
for i in (chunks * 8)..(chunks * 8 + remainder) {
sum += unsafe { x.get_unchecked(i) * y.get_unchecked(i) };
}
sum
}
#[inline]
fn axpy_ultra_fast(alpha: f64, x: &[f64], y: &mut [f64]) {
let n = x.len();
let chunks = n / 8;
let remainder = n % 8;
for chunk in 0..chunks {
let base = chunk * 8;
unsafe {
*y.get_unchecked_mut(base) += alpha * x.get_unchecked(base);
*y.get_unchecked_mut(base + 1) += alpha * x.get_unchecked(base + 1);
*y.get_unchecked_mut(base + 2) += alpha * x.get_unchecked(base + 2);
*y.get_unchecked_mut(base + 3) += alpha * x.get_unchecked(base + 3);
*y.get_unchecked_mut(base + 4) += alpha * x.get_unchecked(base + 4);
*y.get_unchecked_mut(base + 5) += alpha * x.get_unchecked(base + 5);
*y.get_unchecked_mut(base + 6) += alpha * x.get_unchecked(base + 6);
*y.get_unchecked_mut(base + 7) += alpha * x.get_unchecked(base + 7);
}
}
for i in (chunks * 8)..(chunks * 8 + remainder) {
unsafe { *y.get_unchecked_mut(i) += alpha * x.get_unchecked(i); }
}
}
}
pub fn generate_test_matrix(size: usize, sparsity: f64) -> (UltraFastCSR, Vec<f64>) {
let mut triplets = Vec::new();
let mut rng_state = 12345u64;
for i in 0..size {
triplets.push((i, i, 10.0 + i as f64 * 0.01));
let nnz_per_row = ((size as f64 * sparsity).max(1.0) as usize).min(10);
for _ in 0..nnz_per_row {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
let j = (rng_state as usize) % size;
if i != j {
let val = (rng_state as f64 / u64::MAX as f64) * 0.1; triplets.push((i, j, val));
}
}
}
let matrix = UltraFastCSR::from_triplets(triplets, size, size);
let b = vec![1.0; size];
(matrix, b)
}
pub fn benchmark_rust_performance() {
println!("🚀 Ultra-Fast Rust Benchmark - Showing TRUE Rust Performance");
println!("Target: 100x+ faster than Python, not 190x slower!");
println!("=" * 70);
let sizes = [100, 1000, 5000, 10000];
let sparsity = 0.001;
for size in sizes {
println!("\n📊 Testing {}x{} matrix (sparsity: {:.1}%)...", size, size, sparsity * 100.0);
let (matrix, b) = generate_test_matrix(size, sparsity);
println!(" NNZ: {}", matrix.nnz());
let solver = UltraFastCG::new(1000, 1e-10);
let _ = solver.solve(&matrix, &b);
let start = Instant::now();
let solution = solver.solve(&matrix, &b);
let elapsed = start.elapsed();
let time_ms = elapsed.as_secs_f64() * 1000.0;
let python_baseline_ms = match size {
100 => 5.0, 1000 => 40.0, 5000 => 500.0, 10000 => 2000.0, _ => 1000.0,
};
let speedup = python_baseline_ms / time_ms;
let status = if speedup >= 10.0 { "🚀 CRUSHING" }
else if speedup >= 2.0 { "✅ WINNING" }
else { "❌ NEEDS WORK" };
println!(" Rust time: {:.3}ms", time_ms);
println!(" Python baseline: {:.1}ms", python_baseline_ms);
println!(" Speedup: {:.1}x {}", speedup, status);
println!(" Memory usage: ~{:.2}MB", (matrix.nnz() * 16) as f64 / 1024.0 / 1024.0);
let mut residual = vec![0.0; size];
matrix.multiply_vector_ultra_fast(&solution, &mut residual);
let mut error = 0.0;
for i in 0..size {
let diff = residual[i] - b[i];
error += diff * diff;
}
error = error.sqrt();
println!(" Solution error: {:.2e}", error);
if time_ms > python_baseline_ms {
println!(" ⚠️ WARNING: Rust is slower than Python - major optimization needed!");
}
}
println!("\n🎯 Target Performance Goals:");
println!(" - 1000x1000: < 5ms (target: 10x+ faster than Python)");
println!(" - 10000x10000: < 50ms (target: 40x+ faster than Python)");
println!(" - Memory efficiency: < 1MB for sparse matrices");
println!(" - Solution accuracy: < 1e-8 relative error");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ultra_fast_solver() {
let triplets = vec![
(0, 0, 4.0), (0, 1, 1.0),
(1, 0, 1.0), (1, 1, 3.0),
];
let matrix = UltraFastCSR::from_triplets(triplets, 2, 2);
let b = vec![1.0, 2.0];
let solver = UltraFastCG::new(1000, 1e-10);
let solution = solver.solve(&matrix, &b);
let mut result = vec![0.0; 2];
matrix.multiply_vector_ultra_fast(&solution, &mut result);
let error = ((result[0] - b[0]).powi(2) + (result[1] - b[1]).powi(2)).sqrt();
assert!(error < 1e-8, "Solution error too large: {}", error);
}
#[test]
fn test_performance_target() {
let (matrix, b) = generate_test_matrix(1000, 0.001);
let solver = UltraFastCG::new(1000, 1e-8);
let start = Instant::now();
let _solution = solver.solve(&matrix, &b);
let elapsed = start.elapsed();
let time_ms = elapsed.as_secs_f64() * 1000.0;
println!("1000x1000 solve time: {:.3}ms", time_ms);
assert!(time_ms < 10.0, "Performance target missed: {:.3}ms > 10ms", time_ms);
}
}