use crate::{Result, Tensor, TensorError};
use scirs2_core::ndarray::Array2;
use scirs2_core::numeric::{Float, One, Zero};
#[cfg(feature = "blas")]
#[deprecated(
since = "0.1.0",
note = "blas-* features are deprecated. Default build now uses scirs2-linalg (Pure Rust via OxiBLAS). Remove blas-* features from your build."
)]
use ndarray_linalg as _legacy_linalg;
pub fn matmul_blas<T>(a: &Tensor<T>, b: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static + bytemuck::Pod,
{
#[cfg(feature = "blas")]
{
let a_data = a.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Matrix multiplication requires contiguous tensor data".to_string(),
)
})?;
let b_data = b.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Matrix multiplication requires contiguous tensor data".to_string(),
)
})?;
let a_shape = a.shape().dims();
let b_shape = b.shape().dims();
if a_shape.len() != 2 || b_shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Matrix multiplication requires 2D tensors".to_string(),
));
}
let a_array =
Array2::from_shape_vec((a_shape[0], a_shape[1]), a_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let b_array =
Array2::from_shape_vec((b_shape[0], b_shape[1]), b_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let result = a_array.dot(&b_array);
Ok(Tensor::from_array(result.into_dyn()))
}
#[cfg(not(feature = "blas"))]
{
crate::ops::matmul(a, b)
}
}
pub fn lu_decompose_lapack<T>(input: &Tensor<T>) -> Result<(Tensor<T>, Tensor<T>, Tensor<T>)>
where
T: Clone
+ Default
+ Zero
+ One
+ Float
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
#[cfg(feature = "blas")]
{
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"LU decomposition requires contiguous tensor data".to_string(),
)
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"LU decomposition requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
Err(TensorError::BlasError {
operation: "lu".to_string(),
details: "LU decomposition not yet implemented with current ndarray_linalg API"
.to_string(),
context: None,
})
}
#[cfg(not(feature = "blas"))]
{
let (l, u, p) = crate::ops::lu(input)?;
Ok((l, u, p))
}
}
pub fn inverse_lapack<T>(input: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use std::any::TypeId;
if TypeId::of::<T>() == TypeId::of::<f32>() {
let input_f32 = unsafe { &*(input as *const Tensor<T> as *const Tensor<f32>) };
let result = super::lapack_f32::inverse_f32(input_f32)?;
Ok(unsafe { std::mem::transmute_copy(&result) })
} else if TypeId::of::<T>() == TypeId::of::<f64>() {
let input_f64 = unsafe { &*(input as *const Tensor<T> as *const Tensor<f64>) };
let result = super::lapack_f64::inverse_f64(input_f64)?;
Ok(unsafe { std::mem::transmute_copy(&result) })
} else {
Err(TensorError::BlasError {
operation: "inv".to_string(),
details: "LAPACK operations only supported for f32 and f64".to_string(),
context: None,
})
}
}
#[cfg(not(feature = "blas"))]
{
crate::ops::inv(input)
}
}
pub fn determinant_lapack<T>(input: &Tensor<T>) -> Result<T>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::Determinant;
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Determinant requires contiguous tensor data".to_string(),
)
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Determinant requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
Err(TensorError::not_implemented_simple(
"Determinant requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
crate::ops::det(input).and_then(|det_tensor| {
det_tensor.as_slice().map(|s| s[0]).ok_or_else(|| {
TensorError::invalid_shape_simple(
"Determinant tensor is not contiguous".to_string(),
)
})
})
}
}
pub fn eigenvalues_lapack<T>(input: &Tensor<T>) -> Result<(Tensor<T>, Tensor<T>)>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::Eig;
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Eigenvalue decomposition requires contiguous tensor data".to_string(),
)
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Eigenvalue decomposition requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
Err(TensorError::not_implemented_simple(
"Eigenvalues requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
crate::ops::linalg::eig(input)
}
}
pub fn svd_lapack<T>(input: &Tensor<T>) -> Result<(Tensor<T>, Tensor<T>, Tensor<T>)>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::SVD;
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple("SVD requires contiguous tensor data".to_string())
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"SVD requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
Err(TensorError::not_implemented_simple(
"SVD requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
crate::ops::linalg::svd(input)
}
}
pub fn cholesky_lapack<T>(input: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::Cholesky;
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Cholesky decomposition requires contiguous tensor data".to_string(),
)
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Cholesky decomposition requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
Err(TensorError::not_implemented_simple(
"Cholesky requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
crate::ops::linalg::cholesky(input)
}
}
pub fn qr_lapack<T>(input: &Tensor<T>) -> Result<(Tensor<T>, Tensor<T>)>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::QR;
let data = input.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"QR decomposition requires contiguous tensor data".to_string(),
)
})?;
let shape = input.shape().dims();
if shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"QR decomposition requires 2D tensor".to_string(),
));
}
let matrix = Array2::from_shape_vec((shape[0], shape[1]), data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let mut matrix_copy = matrix.clone();
Err(TensorError::not_implemented_simple(
"QR decomposition requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
let n = input.shape().dims()[0];
let m = input.shape().dims()[1];
if n != m {
return Err(TensorError::invalid_shape_simple(
"QR decomposition requires square matrix for fallback".to_string(),
));
}
let input_data = input.as_slice().expect("tensor should be contiguous");
let (q, r) = crate::ops::linalg::qr_decomposition(input_data, n)?;
Ok((Tensor::from_vec(q, &[n, n])?, Tensor::from_vec(r, &[n, n])?))
}
}
pub fn solve_lapack<T>(a: &Tensor<T>, b: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static + bytemuck::Pod,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::Solve;
let a_data = a.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Linear system solve requires contiguous tensor data".to_string(),
)
})?;
let b_data = b.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Linear system solve requires contiguous tensor data".to_string(),
)
})?;
let a_shape = a.shape().dims();
let b_shape = b.shape().dims();
if a_shape.len() != 2 || b_shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Linear system solve requires 2D matrices".to_string(),
));
}
let a_matrix =
Array2::from_shape_vec((a_shape[0], a_shape[1]), a_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let b_matrix =
Array2::from_shape_vec((b_shape[0], b_shape[1]), b_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let mut a_matrix_copy = a_matrix.clone();
let mut b_matrix_copy = b_matrix.clone();
Err(TensorError::not_implemented_simple(
"Solve requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
let inv_a = crate::ops::inv(a)?;
crate::ops::matmul(&inv_a, b)
}
}
pub fn lstsq_lapack<T>(a: &Tensor<T>, b: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone
+ Default
+ Zero
+ One
+ Float
+ Send
+ Sync
+ 'static
+ bytemuck::Pod
+ bytemuck::Zeroable,
{
#[cfg(feature = "blas")]
{
use ndarray_linalg::LeastSquaresSvd;
let a_data = a.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Least squares solve requires contiguous tensor data".to_string(),
)
})?;
let b_data = b.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Least squares solve requires contiguous tensor data".to_string(),
)
})?;
let a_shape = a.shape().dims();
let b_shape = b.shape().dims();
if a_shape.len() != 2 || b_shape.len() != 2 {
return Err(TensorError::invalid_shape_simple(
"Least squares solve requires 2D matrices".to_string(),
));
}
let a_matrix =
Array2::from_shape_vec((a_shape[0], a_shape[1]), a_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let b_matrix =
Array2::from_shape_vec((b_shape[0], b_shape[1]), b_data.to_vec()).map_err(|_| {
TensorError::invalid_shape_simple(
"Failed to create Array2 from tensor data".to_string(),
)
})?;
let mut a_matrix_copy = a_matrix.clone();
let mut b_matrix_copy = b_matrix.clone();
Err(TensorError::not_implemented_simple(
"Least squares requires specific type (f32/f64) implementation".to_string(),
))
}
#[cfg(not(feature = "blas"))]
{
let (u, s, vt) = crate::ops::linalg::svd(a)?;
let ut_b = crate::ops::matmul(&crate::ops::transpose(&u)?, b)?;
let s_data = s.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Singular values tensor is not contiguous".to_string(),
)
})?;
let s_inv_data: Vec<T> = s_data
.iter()
.map(|val| {
if val.abs() > T::from(1e-10).unwrap_or(T::zero()) {
T::one() / *val
} else {
T::zero()
}
})
.collect();
let s_inv = Tensor::from_vec(s_inv_data, s.shape().dims())?;
let s_inv_ut_b = crate::ops::mul(&s_inv, &ut_b)?;
let result = crate::ops::matmul(&crate::ops::transpose(&vt)?, &s_inv_ut_b)?;
Ok(result)
}
}
pub fn pinv<T>(input: &Tensor<T>) -> Result<Tensor<T>>
where
T: Clone + Default + Zero + One + Float + Send + Sync + 'static + bytemuck::Pod,
{
{
let (u, s_full, v) = crate::ops::linalg::svd(input)?;
let input_shape = input.shape().dims();
let m = input_shape[0];
let n = input_shape[1];
let min_dim = m.min(n);
let s_data = s_full.as_slice().ok_or_else(|| {
TensorError::invalid_shape_simple(
"Singular values tensor is not contiguous".to_string(),
)
})?;
let mut singular_values = Vec::new();
for i in 0..min_dim {
singular_values.push(s_data[i * n + i]);
}
let threshold = if let Some(max_s) = singular_values.iter().max_by(|a, b| {
a.abs()
.partial_cmp(&b.abs())
.expect("partial_cmp should not return None for valid values")
}) {
max_s.abs()
* T::from(1e-15).unwrap_or(T::from(1e-10).expect("fallback epsilon should convert"))
} else {
T::from(1e-15).unwrap_or(T::from(1e-10).expect("fallback epsilon should convert"))
};
let mut s_pinv_data = vec![T::zero(); n * m]; for i in 0..min_dim {
if singular_values[i].abs() > threshold {
s_pinv_data[i * m + i] = T::one() / singular_values[i];
}
}
let s_pinv = Tensor::from_vec(s_pinv_data, &[n, m])?;
let ut = crate::ops::transpose(&u)?;
let vs_pinv = crate::ops::matmul(&v, &s_pinv)?;
let result = crate::ops::matmul(&vs_pinv, &ut)?;
Ok(result)
}
}
pub fn is_lapack_available() -> bool {
#[cfg(feature = "blas")]
{
true
}
#[cfg(not(feature = "blas"))]
{
false
}
}
pub fn lapack_provider() -> &'static str {
#[cfg(feature = "blas-oxiblas")]
return "OxiBLAS";
#[cfg(all(feature = "blas-openblas", not(feature = "blas-oxiblas")))]
return "OpenBLAS";
#[cfg(all(
feature = "blas-mkl",
not(any(feature = "blas-oxiblas", feature = "blas-openblas"))
))]
return "Intel MKL";
#[cfg(all(
feature = "blas-accelerate",
not(any(
feature = "blas-oxiblas",
feature = "blas-openblas",
feature = "blas-mkl"
))
))]
return "Apple Accelerate";
#[cfg(all(
feature = "blas",
not(any(
feature = "blas-oxiblas",
feature = "blas-openblas",
feature = "blas-mkl",
feature = "blas-accelerate"
))
))]
{
return "Generic BLAS";
}
#[cfg(not(feature = "blas"))]
{
"None (Pure Rust)"
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_lapack_availability() {
let available = is_lapack_available();
let provider = lapack_provider();
println!("LAPACK available: {}", available);
println!("Provider: {}", provider);
assert!(provider.len() > 0);
}
#[test]
fn test_matmul_consistency() {
let a = Tensor::from_vec(vec![1.0f32, 2.0, 3.0, 4.0], &[2, 2])
.expect("test: from_vec should succeed");
let b = Tensor::from_vec(vec![5.0f32, 6.0, 7.0, 8.0], &[2, 2])
.expect("test: from_vec should succeed");
let result_regular = crate::ops::matmul(&a, &b).expect("test: matmul should succeed");
let result_lapack = matmul_blas(&a, &b).expect("test: matmul_blas should succeed");
for (r, l) in result_regular
.as_slice()
.expect("test: tensor data should be accessible as slice")
.iter()
.zip(
result_lapack
.as_slice()
.expect("tensor should be contiguous")
.iter(),
)
{
assert_abs_diff_eq!(r, l, epsilon = 1e-6);
}
}
#[test]
#[ignore = "LAPACK determinant not yet fully implemented for generic types"]
fn test_determinant_consistency() {
let matrix = Tensor::from_vec(vec![1.0f32, 2.0, 3.0, 4.0], &[2, 2])
.expect("test: from_vec should succeed");
let result_regular = crate::ops::det(&matrix)
.expect("test: operation should succeed")
.as_slice()
.expect("tensor should be contiguous")[0];
match determinant_lapack(&matrix) {
Ok(result_lapack) => {
assert_abs_diff_eq!(result_regular, result_lapack, epsilon = 1e-6);
}
Err(_) => {
assert_abs_diff_eq!(result_regular, -2.0, epsilon = 1e-6);
}
}
}
}