#![cfg(target_arch = "riscv64")]
use crate::traits::SimdError;
pub type RiscVResult<T> = Result<T, SimdError>;
#[derive(Debug, Clone, Copy)]
pub struct RiscVVectorCaps {
pub vlen: usize,
pub elen: usize,
pub available: bool,
}
impl RiscVVectorCaps {
pub fn detect() -> Self {
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
{
Self {
vlen: Self::detect_vlen(),
elen: 64, available: true,
}
}
#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
{
Self {
vlen: 128, elen: 64, available: false,
}
}
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
fn detect_vlen() -> usize {
128 }
pub fn f32_width(&self) -> usize {
if self.available {
self.vlen / 32 } else {
1 }
}
pub fn f64_width(&self) -> usize {
if self.available {
self.vlen / 64 } else {
1 }
}
}
pub struct RiscVVectorOps;
impl RiscVVectorOps {
pub fn dot_product(x: &[f32], y: &[f32]) -> RiscVResult<f32> {
if x.len() != y.len() {
return Err(SimdError::DimensionMismatch {
expected: x.len(),
actual: y.len(),
});
}
if x.is_empty() {
return Err(SimdError::EmptyInput);
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
{
if x.len() >= 4 {
return Self::dot_product_rvv(x, y);
}
}
let result = x.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
Ok(result)
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
fn dot_product_rvv(x: &[f32], y: &[f32]) -> RiscVResult<f32> {
let mut sum = 0.0f32;
let len = x.len();
const CHUNK_SIZE: usize = 64;
let chunks = len / CHUNK_SIZE;
for chunk in 0..chunks {
let start = chunk * CHUNK_SIZE;
let end = start + CHUNK_SIZE;
let chunk_sum: f32 = x[start..end]
.iter()
.zip(&y[start..end])
.map(|(a, b)| a * b)
.sum();
sum += chunk_sum;
}
for i in (chunks * CHUNK_SIZE)..len {
sum += x[i] * y[i];
}
Ok(sum)
}
pub fn add(x: &[f32], y: &[f32]) -> RiscVResult<Vec<f32>> {
if x.len() != y.len() {
return Err(SimdError::DimensionMismatch {
expected: x.len(),
actual: y.len(),
});
}
if x.is_empty() {
return Err(SimdError::EmptyInput);
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
{
if x.len() >= 4 {
return Self::add_rvv(x, y);
}
}
let result = x.iter().zip(y.iter()).map(|(a, b)| a + b).collect();
Ok(result)
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
fn add_rvv(x: &[f32], y: &[f32]) -> RiscVResult<Vec<f32>> {
let mut result = Vec::with_capacity(x.len());
let len = x.len();
const CHUNK_SIZE: usize = 32;
let chunks = len / CHUNK_SIZE;
for chunk in 0..chunks {
let start = chunk * CHUNK_SIZE;
let end = start + CHUNK_SIZE;
for i in start..end {
result.push(x[i] + y[i]);
}
}
for i in (chunks * CHUNK_SIZE)..len {
result.push(x[i] + y[i]);
}
Ok(result)
}
pub fn scale(vector: &[f32], scalar: f32) -> RiscVResult<Vec<f32>> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
{
if vector.len() >= 4 {
return Self::scale_rvv(vector, scalar);
}
}
let result = vector.iter().map(|x| x * scalar).collect();
Ok(result)
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
fn scale_rvv(vector: &[f32], scalar: f32) -> RiscVResult<Vec<f32>> {
let mut result = Vec::with_capacity(vector.len());
let len = vector.len();
const CHUNK_SIZE: usize = 32;
let chunks = len / CHUNK_SIZE;
for chunk in 0..chunks {
let start = chunk * CHUNK_SIZE;
let end = start + CHUNK_SIZE;
for i in start..end {
result.push(vector[i] * scalar);
}
}
for i in (chunks * CHUNK_SIZE)..len {
result.push(vector[i] * scalar);
}
Ok(result)
}
pub fn sum(vector: &[f32]) -> RiscVResult<f32> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
Ok(vector.iter().sum())
}
pub fn max(vector: &[f32]) -> RiscVResult<f32> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
let result = vector.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
Ok(result)
}
pub fn min(vector: &[f32]) -> RiscVResult<f32> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
let result = vector.iter().fold(f32::INFINITY, |a, &b| a.min(b));
Ok(result)
}
pub fn norm(vector: &[f32]) -> RiscVResult<f32> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
let sum_of_squares: f32 = vector.iter().map(|x| x * x).sum();
Ok(sum_of_squares.sqrt())
}
pub fn fma(a: &[f32], b: &[f32], c: &[f32]) -> RiscVResult<Vec<f32>> {
if a.len() != b.len() || a.len() != c.len() {
return Err(SimdError::DimensionMismatch {
expected: a.len(),
actual: b.len().min(c.len()),
});
}
if a.is_empty() {
return Err(SimdError::EmptyInput);
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
{
if a.len() >= 4 {
return Self::fma_rvv(a, b, c);
}
}
let result = a
.iter()
.zip(b.iter())
.zip(c.iter())
.map(|((a_val, b_val), c_val)| a_val * b_val + c_val)
.collect();
Ok(result)
}
#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
fn fma_rvv(a: &[f32], b: &[f32], c: &[f32]) -> RiscVResult<Vec<f32>> {
let mut result = Vec::with_capacity(a.len());
let len = a.len();
const CHUNK_SIZE: usize = 32;
let chunks = len / CHUNK_SIZE;
for chunk in 0..chunks {
let start = chunk * CHUNK_SIZE;
let end = start + CHUNK_SIZE;
for i in start..end {
result.push(a[i] * b[i] + c[i]);
}
}
for i in (chunks * CHUNK_SIZE)..len {
result.push(a[i] * b[i] + c[i]);
}
Ok(result)
}
pub fn matvec_multiply(matrix: &[Vec<f32>], vector: &[f32]) -> RiscVResult<Vec<f32>> {
if matrix.is_empty() || vector.is_empty() {
return Err(SimdError::EmptyInput);
}
let rows = matrix.len();
let cols = matrix[0].len();
if vector.len() != cols {
return Err(SimdError::DimensionMismatch {
expected: cols,
actual: vector.len(),
});
}
let mut result = Vec::with_capacity(rows);
for row in matrix {
let dot_result = Self::dot_product(row, vector)?;
result.push(dot_result);
}
Ok(result)
}
pub fn normalize(vector: &[f32]) -> RiscVResult<Vec<f32>> {
if vector.is_empty() {
return Err(SimdError::EmptyInput);
}
let norm = Self::norm(vector)?;
if norm == 0.0 {
return Ok(vec![0.0; vector.len()]);
}
Self::scale(vector, 1.0 / norm)
}
}
pub struct RiscVActivations;
impl RiscVActivations {
pub fn relu(input: &[f32]) -> RiscVResult<Vec<f32>> {
if input.is_empty() {
return Err(SimdError::EmptyInput);
}
let result = input.iter().map(|&x| x.max(0.0)).collect();
Ok(result)
}
pub fn sigmoid(input: &[f32]) -> RiscVResult<Vec<f32>> {
if input.is_empty() {
return Err(SimdError::EmptyInput);
}
let result = input
.iter()
.map(|&x| {
let clamped = x.clamp(-10.0, 10.0);
1.0 / (1.0 + (-clamped).exp())
})
.collect();
Ok(result)
}
pub fn tanh(input: &[f32]) -> RiscVResult<Vec<f32>> {
if input.is_empty() {
return Err(SimdError::EmptyInput);
}
let result = input.iter().map(|&x| x.tanh()).collect();
Ok(result)
}
}
pub struct RiscVDistanceOps;
impl RiscVDistanceOps {
pub fn euclidean_distance(x: &[f32], y: &[f32]) -> RiscVResult<f32> {
if x.len() != y.len() {
return Err(SimdError::DimensionMismatch {
expected: x.len(),
actual: y.len(),
});
}
if x.is_empty() {
return Err(SimdError::EmptyInput);
}
let sum_of_squares: f32 = x.iter().zip(y.iter()).map(|(a, b)| (a - b).powi(2)).sum();
Ok(sum_of_squares.sqrt())
}
pub fn manhattan_distance(x: &[f32], y: &[f32]) -> RiscVResult<f32> {
if x.len() != y.len() {
return Err(SimdError::DimensionMismatch {
expected: x.len(),
actual: y.len(),
});
}
if x.is_empty() {
return Err(SimdError::EmptyInput);
}
let sum: f32 = x.iter().zip(y.iter()).map(|(a, b)| (a - b).abs()).sum();
Ok(sum)
}
pub fn cosine_distance(x: &[f32], y: &[f32]) -> RiscVResult<f32> {
if x.len() != y.len() {
return Err(SimdError::DimensionMismatch {
expected: x.len(),
actual: y.len(),
});
}
if x.is_empty() {
return Err(SimdError::EmptyInput);
}
let dot_product: f32 = x.iter().zip(y.iter()).map(|(a, b)| a * b).sum();
let norm_x: f32 = x.iter().map(|a| a * a).sum::<f32>().sqrt();
let norm_y: f32 = y.iter().map(|b| b * b).sum::<f32>().sqrt();
if norm_x == 0.0 || norm_y == 0.0 {
return Ok(1.0); }
let cosine_similarity = dot_product / (norm_x * norm_y);
Ok(1.0 - cosine_similarity)
}
}
pub struct RiscVConfig {
caps: RiscVVectorCaps,
}
impl RiscVConfig {
pub fn new() -> Self {
Self {
caps: RiscVVectorCaps::detect(),
}
}
pub fn is_available(&self) -> bool {
self.caps.available
}
pub fn capabilities(&self) -> RiscVVectorCaps {
self.caps
}
pub fn optimal_f32_width(&self) -> usize {
self.caps.f32_width()
}
pub fn optimal_f64_width(&self) -> usize {
self.caps.f64_width()
}
}
impl Default for RiscVConfig {
fn default() -> Self {
Self::new()
}
}
static RISCV_CONFIG: std::sync::LazyLock<RiscVConfig> = std::sync::LazyLock::new(RiscVConfig::new);
pub fn riscv_config() -> &'static RiscVConfig {
&RISCV_CONFIG
}
#[allow(non_snake_case)]
#[cfg(all(test, not(feature = "no-std")))]
mod tests {
use super::*;
#[test]
fn test_riscv_caps_detection() {
let caps = RiscVVectorCaps::detect();
assert!(caps.vlen > 0);
assert!(caps.elen > 0);
assert!(caps.f32_width() >= 1);
assert!(caps.f64_width() >= 1);
}
#[test]
fn test_riscv_dot_product() {
let x = vec![1.0, 2.0, 3.0, 4.0];
let y = vec![2.0, 3.0, 4.0, 5.0];
let result = RiscVVectorOps::dot_product(&x, &y).expect("operation should succeed");
assert_eq!(result, 40.0); }
#[test]
fn test_riscv_vector_add() {
let x = vec![1.0, 2.0, 3.0];
let y = vec![4.0, 5.0, 6.0];
let result = RiscVVectorOps::add(&x, &y).expect("operation should succeed");
assert_eq!(result, vec![5.0, 7.0, 9.0]);
}
#[test]
fn test_riscv_scale() {
let vector = vec![1.0, 2.0, 3.0];
let result = RiscVVectorOps::scale(&vector, 2.0).expect("operation should succeed");
assert_eq!(result, vec![2.0, 4.0, 6.0]);
}
#[test]
fn test_riscv_sum() {
let vector = vec![1.0, 2.0, 3.0, 4.0];
let result = RiscVVectorOps::sum(&vector).expect("operation should succeed");
assert_eq!(result, 10.0);
}
#[test]
fn test_riscv_max_min() {
let vector = vec![3.0, 1.0, 4.0, 1.0, 5.0];
let max_result = RiscVVectorOps::max(&vector).expect("operation should succeed");
let min_result = RiscVVectorOps::min(&vector).expect("operation should succeed");
assert_eq!(max_result, 5.0);
assert_eq!(min_result, 1.0);
}
#[test]
fn test_riscv_norm() {
let vector = vec![3.0, 4.0, 0.0];
let result = RiscVVectorOps::norm(&vector).expect("operation should succeed");
assert!((result - 5.0).abs() < 1e-6);
}
#[test]
fn test_riscv_relu() {
let input = vec![-1.0, 0.0, 1.0, 2.0];
let result = RiscVActivations::relu(&input).expect("operation should succeed");
assert_eq!(result, vec![0.0, 0.0, 1.0, 2.0]);
}
#[test]
fn test_riscv_sigmoid() {
let input = vec![0.0, 1.0, -1.0];
let result = RiscVActivations::sigmoid(&input).expect("operation should succeed");
assert!((result[0] - 0.5).abs() < 0.01); assert!(result[1] > 0.5); assert!(result[2] < 0.5); }
#[test]
fn test_riscv_euclidean_distance() {
let x = vec![0.0, 0.0];
let y = vec![3.0, 4.0];
let result =
RiscVDistanceOps::euclidean_distance(&x, &y).expect("operation should succeed");
assert!((result - 5.0).abs() < 1e-6);
}
#[test]
fn test_riscv_manhattan_distance() {
let x = vec![1.0, 2.0];
let y = vec![4.0, 6.0];
let result =
RiscVDistanceOps::manhattan_distance(&x, &y).expect("operation should succeed");
assert_eq!(result, 7.0); }
#[test]
fn test_dimension_mismatch() {
let x = vec![1.0, 2.0];
let y = vec![1.0, 2.0, 3.0];
assert!(RiscVVectorOps::dot_product(&x, &y).is_err());
assert!(RiscVDistanceOps::euclidean_distance(&x, &y).is_err());
}
#[test]
fn test_empty_input() {
let empty: Vec<f32> = vec![];
assert!(RiscVVectorOps::sum(&empty).is_err());
assert!(RiscVActivations::relu(&empty).is_err());
}
#[test]
fn test_riscv_config() {
let config = RiscVConfig::new();
assert!(config.optimal_f32_width() >= 1);
assert!(config.optimal_f64_width() >= 1);
let caps = config.capabilities();
assert!(caps.vlen > 0);
assert!(caps.elen > 0);
}
#[test]
fn test_global_riscv_config() {
let config = riscv_config();
assert!(config.optimal_f32_width() >= 1);
assert!(config.optimal_f64_width() >= 1);
}
#[test]
fn test_riscv_fma() {
let a = vec![1.0, 2.0, 3.0, 4.0];
let b = vec![2.0, 3.0, 4.0, 5.0];
let c = vec![1.0, 1.0, 1.0, 1.0];
let result = RiscVVectorOps::fma(&a, &b, &c).expect("operation should succeed");
let expected = vec![3.0, 7.0, 13.0, 21.0];
assert_eq!(result, expected);
}
#[test]
fn test_riscv_fma_dimension_mismatch() {
let a = vec![1.0, 2.0];
let b = vec![2.0, 3.0, 4.0];
let c = vec![1.0, 1.0];
assert!(RiscVVectorOps::fma(&a, &b, &c).is_err());
}
#[test]
fn test_riscv_matvec_multiply() {
let matrix = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let vector = vec![2.0, 3.0];
let result =
RiscVVectorOps::matvec_multiply(&matrix, &vector).expect("operation should succeed");
let expected = vec![8.0, 18.0, 28.0];
assert_eq!(result, expected);
}
#[test]
fn test_riscv_matvec_dimension_mismatch() {
let matrix = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let vector = vec![2.0, 3.0, 4.0];
assert!(RiscVVectorOps::matvec_multiply(&matrix, &vector).is_err());
}
#[test]
fn test_riscv_normalize() {
let vector = vec![3.0, 4.0, 0.0];
let result = RiscVVectorOps::normalize(&vector).expect("operation should succeed");
let expected_norm = (3.0_f32.powi(2) + 4.0_f32.powi(2)).sqrt(); let expected = vec![3.0 / expected_norm, 4.0 / expected_norm, 0.0];
for (r, e) in result.iter().zip(expected.iter()) {
assert!((r - e).abs() < 1e-6);
}
}
#[test]
fn test_riscv_normalize_zero_vector() {
let vector = vec![0.0, 0.0, 0.0];
let result = RiscVVectorOps::normalize(&vector).expect("operation should succeed");
let expected = vec![0.0, 0.0, 0.0];
assert_eq!(result, expected);
}
#[test]
fn test_riscv_empty_fma() {
let empty: Vec<f32> = vec![];
assert!(RiscVVectorOps::fma(&empty, &empty, &empty).is_err());
}
#[test]
fn test_riscv_capabilities_detection() {
let caps = RiscVVectorCaps::detect();
assert!(caps.vlen > 0);
assert!(caps.elen > 0);
assert!(caps.f32_width() >= 1);
assert!(caps.f64_width() >= 1);
assert!(caps.vlen >= 64 && caps.vlen <= 2048);
assert!(caps.elen >= 32);
}
#[test]
fn test_riscv_large_vector_operations() {
let size = 1000;
let x: Vec<f32> = (0..size).map(|i| i as f32).collect();
let y: Vec<f32> = (0..size).map(|i| (i + 1) as f32).collect();
let dot_result = RiscVVectorOps::dot_product(&x, &y).expect("operation should succeed");
let add_result = RiscVVectorOps::add(&x, &y).expect("operation should succeed");
let scale_result = RiscVVectorOps::scale(&x, 2.0).expect("operation should succeed");
assert!(dot_result > 0.0);
assert_eq!(add_result.len(), size);
assert_eq!(scale_result.len(), size);
assert_eq!(add_result[0], 1.0); assert_eq!(add_result[10], 21.0); assert_eq!(scale_result[0], 0.0); assert_eq!(scale_result[10], 20.0); }
}