use crate::dtype::DType;
use crate::error::{Error, Result};
use crate::runtime::Runtime;
use crate::sparse::CscData;
use crate::tensor::Tensor;
pub(crate) fn extract_values_f64<R: Runtime<DType = DType>>(a: &CscData<R>) -> Result<Vec<f64>> {
let dtype = a.values().dtype();
match dtype {
DType::F32 => Ok(a
.values()
.to_vec::<f32>()
.iter()
.map(|&x| x as f64)
.collect()),
DType::F64 => Ok(a.values().to_vec()),
_ => Err(Error::UnsupportedDType {
dtype,
op: "sparse_qr",
}),
}
}
pub(crate) fn extract_values_f64_tensor<R: Runtime<DType = DType>>(
t: &Tensor<R>,
) -> Result<Vec<f64>> {
let dtype = t.dtype();
match dtype {
DType::F32 => Ok(t.to_vec::<f32>().iter().map(|&x| x as f64).collect()),
DType::F64 => Ok(t.to_vec()),
_ => Err(Error::UnsupportedDType {
dtype,
op: "sparse_qr",
}),
}
}
pub(crate) fn create_r_tensor<R: Runtime<DType = DType>>(
m: usize,
n: usize,
r_col_ptrs: &[i64],
r_row_indices: &[i64],
r_values: &[f64],
dtype: DType,
device: &R::Device,
) -> Result<CscData<R>> {
match dtype {
DType::F32 => {
let vals_f32: Vec<f32> = r_values.iter().map(|&x| x as f32).collect();
CscData::<R>::from_slices(r_col_ptrs, r_row_indices, &vals_f32, [m, n], device)
}
DType::F64 => {
CscData::<R>::from_slices(r_col_ptrs, r_row_indices, r_values, [m, n], device)
}
_ => Err(Error::UnsupportedDType {
dtype,
op: "sparse_qr",
}),
}
}
pub(crate) fn create_vector_tensor<R: Runtime<DType = DType>>(
data: &[f64],
dtype: DType,
device: &R::Device,
) -> Result<Tensor<R>> {
let n = data.len();
match dtype {
DType::F32 => {
let data_f32: Vec<f32> = data.iter().map(|&x| x as f32).collect();
Ok(Tensor::<R>::from_slice(&data_f32, &[n], device))
}
DType::F64 => Ok(Tensor::<R>::from_slice(data, &[n], device)),
_ => Err(Error::UnsupportedDType {
dtype,
op: "sparse_qr",
}),
}
}
#[cfg(any(feature = "cuda", feature = "wgpu"))]
pub(crate) fn h_offset(k: usize, m: usize) -> usize {
k * m - k * (k.wrapping_sub(1)) / 2
}
#[cfg(any(feature = "cuda", feature = "wgpu"))]
pub(crate) fn r_offdiag_offset(k: usize) -> usize {
k * (k.wrapping_sub(1)) / 2
}
#[cfg(any(feature = "cuda", feature = "wgpu"))]
pub(crate) fn build_r_csc(
r_offdiag: &[f64],
diag: &[f64],
min_mn: usize,
n: usize,
) -> (Vec<i64>, Vec<i64>, Vec<f64>) {
let mut r_col_ptrs = vec![0i64; n + 1];
let mut r_row_indices: Vec<i64> = Vec::new();
let mut r_values: Vec<f64> = Vec::new();
for k in 0..min_mn {
let ro = r_offdiag_offset(k);
for row in 0..k {
let val = r_offdiag[ro + row];
if val.abs() > 1e-15 {
r_row_indices.push(row as i64);
r_values.push(val);
}
}
r_row_indices.push(k as i64);
r_values.push(diag[k]);
r_col_ptrs[k + 1] = r_row_indices.len() as i64;
}
for k in min_mn..n {
r_col_ptrs[k + 1] = r_col_ptrs[min_mn];
}
(r_col_ptrs, r_row_indices, r_values)
}
#[cfg(any(feature = "cuda", feature = "wgpu"))]
pub(crate) fn detect_rank(diag: &[f64], min_mn: usize, rank_tolerance: f64) -> usize {
for k in 0..min_mn {
if diag[k].abs() < rank_tolerance {
return k;
}
}
min_mn
}