use super::{CooMatrix, CsrMatrix, LinSolParams, LinSolTrait, Matching, Ordering, Pivoting, StatsLinSol, Sym};
use crate::StrError;
use crate::constants::*;
use russell_lab::{Stopwatch, Vector};
#[repr(C)]
struct InterfaceCUDSS {
_data: [u8; 0],
_marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
}
unsafe impl Send for InterfaceCUDSS {}
unsafe impl Send for SolverCUDSS {}
unsafe extern "C" {
fn solver_cudss_new() -> *mut InterfaceCUDSS;
fn solver_cudss_drop(solver: *mut InterfaceCUDSS);
fn solver_cudss_initialize(
solver: *mut InterfaceCUDSS,
ordering: i32,
matching: i32,
pivoting: i32,
pivot_epsilon: f64,
refinement_nstep: i32,
hybrid_memory_factor: f64,
verbose: CcBool,
general_symmetric: CcBool,
positive_definite: CcBool,
ndim: i32,
row_pointers: *const i32,
col_indices: *const i32,
values: *const f64,
) -> i32;
fn solver_cudss_factorize(
solver: *mut InterfaceCUDSS,
effective_matching: *mut i32,
effective_pivoting: *mut i32,
verbose: CcBool,
values: *const f64,
) -> i32;
fn solver_cudss_solve(solver: *mut InterfaceCUDSS, x: *mut f64, rhs: *const f64, verbose: CcBool) -> i32;
}
pub struct SolverCUDSS {
solver: *mut InterfaceCUDSS,
csr: Option<CsrMatrix>,
initialized: bool,
factorized: bool,
initialized_sym: Sym,
initialized_ndim: usize,
initialized_nnz: usize,
effective_matching: i32,
effective_pivoting: i32,
stopwatch: Stopwatch,
time_initialize_ns: u128,
time_factorize_ns: u128,
time_solve_ns: u128,
}
impl Drop for SolverCUDSS {
fn drop(&mut self) {
unsafe {
solver_cudss_drop(self.solver);
}
}
}
impl SolverCUDSS {
pub fn new() -> Result<Self, StrError> {
unsafe {
let solver = solver_cudss_new();
if solver.is_null() {
return Err("c-code failed to allocate the cuDSS solver");
}
Ok(SolverCUDSS {
solver,
csr: None,
initialized: false,
factorized: false,
initialized_sym: Sym::No,
initialized_ndim: 0,
initialized_nnz: 0,
effective_matching: 0,
effective_pivoting: 0,
stopwatch: Stopwatch::new(),
time_initialize_ns: 0,
time_factorize_ns: 0,
time_solve_ns: 0,
})
}
}
}
impl LinSolTrait for SolverCUDSS {
fn factorize(&mut self, mat: &CooMatrix, params: Option<LinSolParams>) -> Result<(), StrError> {
if self.initialized {
if mat.symmetric != self.initialized_sym {
return Err("subsequent factorizations must use the same matrix (symmetric differs)");
}
if mat.nrow != self.initialized_ndim {
return Err("subsequent factorizations must use the same matrix (ndim differs)");
}
if mat.nnz != self.initialized_nnz {
return Err("subsequent factorizations must use the same matrix (nnz differs)");
}
if params.is_some() {
return Err("subsequent factorizations must not change LinSolParams");
}
self.csr.as_mut().unwrap().update_from_coo(mat)?;
} else {
if mat.nrow != mat.ncol {
return Err("the matrix must be square");
}
if mat.nnz < 1 {
return Err("the COO matrix must have at least one non-zero value");
}
if mat.symmetric == Sym::YesFull || mat.symmetric == Sym::YesUpper {
return Err("cuDSS requires Sym::YesLower for symmetric matrices");
}
self.initialized_sym = mat.symmetric;
self.initialized_ndim = mat.nrow;
self.initialized_nnz = mat.nnz;
self.csr = Some(CsrMatrix::from_coo(mat)?);
}
let csr = self.csr.as_ref().unwrap();
let par = if let Some(p) = params { p } else { LinSolParams::new() };
let ordering = cudss_ordering(par.ordering);
let matching = cudss_matching(par.matching);
let pivoting = cudss_pivoting(par.pivoting);
let pivot_epsilon = match par.pivot_epsilon {
Some(val) => val,
None => -1.0, };
let refinement_nstep = match par.refinement_nstep {
Some(val) => val,
None => -1, };
let hybrid_memory_factor = match par.hybrid_memory_factor {
Some(v) => {
if v < 0.01 || v > 0.99 {
return Err("the hybrid memory factor must be in [0.01, 0.99]");
}
v
}
None => -1.0,
};
let verbose = if par.verbose { 1 } else { 0 };
let general_symmetric = if mat.symmetric == Sym::YesLower { 1 } else { 0 };
let positive_definite = if par.positive_definite { 1 } else { 0 };
let ndim = to_i32(csr.nrow);
if !self.initialized {
self.stopwatch.reset();
unsafe {
let status = solver_cudss_initialize(
self.solver,
ordering,
matching,
pivoting,
pivot_epsilon,
refinement_nstep,
hybrid_memory_factor,
verbose,
general_symmetric,
positive_definite,
ndim,
csr.row_pointers.as_ptr(),
csr.col_indices.as_ptr(),
csr.values.as_ptr(),
);
if status != SUCCESSFUL_EXIT {
return Err(handle_cudss_error_code(status));
}
}
self.time_initialize_ns = self.stopwatch.stop();
self.initialized = true;
}
self.stopwatch.reset();
unsafe {
let status = solver_cudss_factorize(
self.solver,
&mut self.effective_matching,
&mut self.effective_pivoting,
verbose,
csr.values.as_ptr(),
);
if status != SUCCESSFUL_EXIT {
return Err(handle_cudss_error_code(status));
}
}
self.time_factorize_ns = self.stopwatch.stop();
self.factorized = true;
Ok(())
}
fn solve(&mut self, x: &mut Vector, rhs: &Vector, verbose: bool) -> Result<(), StrError> {
if !self.factorized {
return Err("the function factorize must be called before solve");
}
if x.dim() != self.initialized_ndim {
return Err("the dimension of the vector of unknown values x is incorrect");
}
if rhs.dim() != self.initialized_ndim {
return Err("the dimension of the right-hand side vector is incorrect");
}
let verb = if verbose { 1 } else { 0 };
self.stopwatch.reset();
unsafe {
let status = solver_cudss_solve(self.solver, x.as_mut_data().as_mut_ptr(), rhs.as_data().as_ptr(), verb);
if status != SUCCESSFUL_EXIT {
return Err(handle_cudss_error_code(status));
}
}
self.time_solve_ns = self.stopwatch.stop();
Ok(())
}
fn update_stats(&self, stats: &mut StatsLinSol) {
stats.main.solver = "cuDSS".to_string();
stats.time_nanoseconds.initialize_array.push(self.time_initialize_ns);
stats.time_nanoseconds.factorize_array.push(self.time_factorize_ns);
stats.time_nanoseconds.solve_array.push(self.time_solve_ns);
stats.output.effective_matching = match self.effective_matching {
CUDSS_MATCHING_ALG_NONE => "None".to_string(),
CUDSS_MATCHING_ALG_AUTO => "Auto".to_string(),
CUDSS_MATCHING_ALG_MAX_DIAG_COUNT => "MaxDiagCount".to_string(),
CUDSS_MATCHING_ALG_MAX_MIN_DIAG => "MaxMinDiag".to_string(),
CUDSS_MATCHING_ALG_MAX_MIN_DIAG_ALT => "MaxMinDiagAlt".to_string(),
CUDSS_MATCHING_ALG_MAX_DIAG_SUM => "MaxDiagSum".to_string(),
CUDSS_MATCHING_ALG_MAX_DIAG_PRODUCT => "MaxDiagProduct".to_string(),
_ => "Unknown".to_string(),
};
stats.output.effective_pivoting = match self.effective_pivoting {
CUDSS_PIVOT_AUTO => "Auto".to_string(),
CUDSS_PIVOT_NONE => "None".to_string(),
CUDSS_PIVOT_GLOBAL_COL => "GlobalCol".to_string(),
CUDSS_PIVOT_GLOBAL_ROW => "GlobalRow".to_string(),
CUDSS_PIVOT_DIAGONAL => "Diagonal".to_string(),
CUDSS_PIVOT_LOCAL_BLOCK => "LocalBlock".to_string(),
_ => "Unknown".to_string(),
};
}
fn get_ns_init(&self) -> u128 {
self.time_initialize_ns
}
fn get_ns_fact(&self) -> u128 {
self.time_factorize_ns
}
fn get_ns_solve(&self) -> u128 {
self.time_solve_ns
}
}
const CUDSS_REORDERING_ALG_DEFAULT: i32 = 0; const CUDSS_REORDERING_ALG_BTF_COLAMD: i32 = 1; const CUDSS_REORDERING_ALG_COLAMD: i32 = 2; const CUDSS_REORDERING_ALG_AMD: i32 = 3; const CUDSS_REORDERING_ALG_NESTED_DISSECTION: i32 = 4; const CUDSS_REORDERING_ALG_NONE: i32 = 5;
pub(crate) fn cudss_ordering(ordering: Ordering) -> i32 {
match ordering {
Ordering::Amd => CUDSS_REORDERING_ALG_AMD,
Ordering::Amf => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::Auto => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::Best => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::BtfColamd => CUDSS_REORDERING_ALG_BTF_COLAMD,
Ordering::Cholmod => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::Colamd => CUDSS_REORDERING_ALG_COLAMD,
Ordering::Metis => CUDSS_REORDERING_ALG_NESTED_DISSECTION,
Ordering::No => CUDSS_REORDERING_ALG_NONE,
Ordering::Pord => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::Qamd => CUDSS_REORDERING_ALG_DEFAULT,
Ordering::Scotch => CUDSS_REORDERING_ALG_DEFAULT,
}
}
pub(crate) const CUDSS_MATCHING_ALG_NONE: i32 = 0;
pub(crate) const CUDSS_MATCHING_ALG_MAX_DIAG_COUNT: i32 = 1;
pub(crate) const CUDSS_MATCHING_ALG_MAX_MIN_DIAG: i32 = 2;
pub(crate) const CUDSS_MATCHING_ALG_MAX_MIN_DIAG_ALT: i32 = 3;
pub(crate) const CUDSS_MATCHING_ALG_MAX_DIAG_SUM: i32 = 4;
pub(crate) const CUDSS_MATCHING_ALG_MAX_DIAG_PRODUCT: i32 = 5;
pub(crate) const CUDSS_MATCHING_ALG_AUTO: i32 = 6;
pub(crate) fn cudss_matching(matching: Matching) -> i32 {
match matching {
Matching::None => CUDSS_MATCHING_ALG_NONE,
Matching::Auto => CUDSS_MATCHING_ALG_AUTO,
Matching::MaxDiagCount => CUDSS_MATCHING_ALG_MAX_DIAG_COUNT,
Matching::MaxMinDiag => CUDSS_MATCHING_ALG_MAX_MIN_DIAG,
Matching::MaxMinDiagAlt => CUDSS_MATCHING_ALG_MAX_MIN_DIAG_ALT,
Matching::MaxDiagSum => CUDSS_MATCHING_ALG_MAX_DIAG_SUM,
Matching::MaxDiagProduct => CUDSS_MATCHING_ALG_MAX_DIAG_PRODUCT,
}
}
pub(crate) const CUDSS_PIVOT_AUTO: i32 = 0;
pub(crate) const CUDSS_PIVOT_NONE: i32 = 1;
pub(crate) const CUDSS_PIVOT_GLOBAL_COL: i32 = 2;
pub(crate) const CUDSS_PIVOT_GLOBAL_ROW: i32 = 3;
pub(crate) const CUDSS_PIVOT_DIAGONAL: i32 = 4;
pub(crate) const CUDSS_PIVOT_LOCAL_BLOCK: i32 = 5;
pub(crate) fn cudss_pivoting(pivoting: Pivoting) -> i32 {
match pivoting {
Pivoting::Auto => CUDSS_PIVOT_AUTO,
Pivoting::None => CUDSS_PIVOT_NONE,
Pivoting::GlobalCol => CUDSS_PIVOT_GLOBAL_COL,
Pivoting::GlobalRow => CUDSS_PIVOT_GLOBAL_ROW,
Pivoting::Diagonal => CUDSS_PIVOT_DIAGONAL,
Pivoting::LocalBlock => CUDSS_PIVOT_LOCAL_BLOCK,
}
}
const ERROR_CUDA_MALLOC: i32 = 100;
const ERROR_CUDA_MEMCPY: i32 = 200;
const ERROR_CUDA_SYNCHRONIZE: i32 = 300;
const ERROR_CUDSS_CONFIG_SET: i32 = 400;
const ERROR_CUDSS_CONFIG_GET: i32 = 450;
const ERROR_CUDSS_MATRIX_CREATE_DN: i32 = 500;
const ERROR_CUDSS_MATRIX_SET_VALUES: i32 = 550;
const ERROR_CUDSS_MATRIX_CREATE_CSR: i32 = 600;
const ERROR_CUDSS_DEVICE: i32 = 1000;
pub(crate) fn handle_cudss_error_code(err: i32) -> StrError {
match err {
ERROR_CUDA_MALLOC => "cudaMalloc failed in the C code (cuDSS)",
ERROR_CUDA_MEMCPY => "cudaMemcpy failed in the C code (cuDSS)",
ERROR_CUDA_SYNCHRONIZE => "cudaStreamSynchronize failed in the C code (cuDSS)",
ERROR_CUDSS_CONFIG_SET => "cudssConfigSet failed in the C code (cuDSS)",
ERROR_CUDSS_CONFIG_GET => "cudssConfigGet failed in the C code (cuDSS)",
ERROR_CUDSS_MATRIX_CREATE_DN => "cudssMatrixCreateDn failed in the C code (cuDSS)",
ERROR_CUDSS_MATRIX_SET_VALUES => "cudssMatrixSetValues failed in the C code (cuDSS)",
ERROR_CUDSS_MATRIX_CREATE_CSR => "cudssMatrixCreateCsr failed in the C code (cuDSS)",
ERROR_CUDSS_DEVICE => "cuDSS device-side error (check CUDSS_DATA_INFO for details)",
701 => "cuDSS symbolic factorization failed: CUDSS_STATUS_NOT_INITIALIZED",
702 => "cuDSS symbolic factorization failed: CUDSS_STATUS_ALLOC_FAILED",
703 => "cuDSS symbolic factorization failed: CUDSS_STATUS_INVALID_VALUE",
704 => "cuDSS symbolic factorization failed: CUDSS_STATUS_NOT_SUPPORTED",
705 => "cuDSS symbolic factorization failed: CUDSS_STATUS_EXECUTION_FAILED",
706 => "cuDSS symbolic factorization failed: CUDSS_STATUS_INTERNAL_ERROR",
707 => "cuDSS symbolic factorization failed: CUDSS_STATUS_IR_FAILED",
801 => "cuDSS numeric factorization failed: CUDSS_STATUS_NOT_INITIALIZED",
802 => "cuDSS numeric factorization failed: CUDSS_STATUS_ALLOC_FAILED",
803 => "cuDSS numeric factorization failed: CUDSS_STATUS_INVALID_VALUE",
804 => "cuDSS numeric factorization failed: CUDSS_STATUS_NOT_SUPPORTED",
805 => "cuDSS numeric factorization failed: CUDSS_STATUS_EXECUTION_FAILED",
806 => "cuDSS numeric factorization failed: CUDSS_STATUS_INTERNAL_ERROR",
807 => "cuDSS numeric factorization failed: CUDSS_STATUS_IR_FAILED",
901 => "cuDSS solve failed: CUDSS_STATUS_NOT_INITIALIZED",
902 => "cuDSS solve failed: CUDSS_STATUS_ALLOC_FAILED",
903 => "cuDSS solve failed: CUDSS_STATUS_INVALID_VALUE",
904 => "cuDSS solve failed: CUDSS_STATUS_NOT_SUPPORTED",
905 => "cuDSS solve failed: CUDSS_STATUS_EXECUTION_FAILED",
906 => "cuDSS solve failed: CUDSS_STATUS_INTERNAL_ERROR",
907 => "cuDSS solve failed: CUDSS_STATUS_IR_FAILED",
_ => "Error: unknown error returned by c-code (cuDSS)",
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{CooMatrix, Samples};
use russell_lab::{approx_eq, vec_approx_eq};
#[test]
fn factorize_handles_errors() {
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::rectangular_1x2(true, false);
assert_eq!(solver.factorize(&coo, None).err(), Some("the matrix must be square"));
let coo = CooMatrix::new(1, 1, 1, Sym::No).unwrap();
assert_eq!(
solver.factorize(&coo, None).err(),
Some("the COO matrix must have at least one non-zero value")
);
let (coo, _, _, _) = Samples::mkl_symmetric_5x5_upper(true, false);
assert_eq!(
solver.factorize(&coo, None).err(),
Some("cuDSS requires Sym::YesLower for symmetric matrices")
);
let mut coo = CooMatrix::new(2, 2, 2, Sym::No).unwrap();
coo.put(0, 0, 1.0).unwrap();
coo.put(1, 1, 2.0).unwrap();
solver.factorize(&coo, None).unwrap();
let mut coo = CooMatrix::new(2, 2, 2, Sym::YesFull).unwrap();
coo.put(0, 0, 1.0).unwrap();
coo.put(1, 1, 2.0).unwrap();
assert_eq!(
solver.factorize(&coo, None).err(),
Some("subsequent factorizations must use the same matrix (symmetric differs)")
);
let mut coo = CooMatrix::new(1, 1, 1, Sym::No).unwrap();
coo.put(0, 0, 1.0).unwrap();
assert_eq!(
solver.factorize(&coo, None).err(),
Some("subsequent factorizations must use the same matrix (ndim differs)")
);
let mut coo = CooMatrix::new(2, 2, 1, Sym::No).unwrap();
coo.put(0, 0, 1.0).unwrap();
assert_eq!(
solver.factorize(&coo, None).err(),
Some("subsequent factorizations must use the same matrix (nnz differs)")
);
}
#[test]
fn factorize_and_solve_work_unsymmetric_default() {
let mut x = Vector::new(5);
let rhs = Vector::from(&[8.0, 45.0, -3.0, 3.0, 19.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::umfpack_unsymmetric_5x5();
let mut params = LinSolParams::new();
solver.factorize(&coo, Some(params)).unwrap();
assert!(solver.factorized);
solver.solve(&mut x, &rhs, false).unwrap();
approx_eq(x[0], x_correct[0], 1e-12);
approx_eq(x[1], x_correct[1], 1e-12);
approx_eq(x[2], x_correct[2], 1e-12);
approx_eq(x[3], 4.001243780749064, 1e-15); approx_eq(x[4], x_correct[4], 1e-12);
let mut stats = StatsLinSol::new();
solver.update_stats(&mut stats);
assert_eq!(stats.main.solver, "cuDSS");
assert_eq!(stats.time_nanoseconds.initialize_array.len(), 1);
assert_eq!(stats.time_nanoseconds.factorize_array.len(), 1);
assert_eq!(stats.time_nanoseconds.solve_array.len(), 1);
assert!(solver.get_ns_init() > 0);
assert!(solver.get_ns_fact() > 0);
assert!(solver.get_ns_solve() > 0);
let mut x_again = Vector::new(5);
solver.solve(&mut x_again, &rhs, false).unwrap();
approx_eq(x_again[0], x_correct[0], 1e-12);
approx_eq(x_again[1], x_correct[1], 1e-12);
approx_eq(x_again[2], x_correct[2], 1e-12);
approx_eq(x[3], 4.001243780749064, 1e-15); approx_eq(x_again[4], x_correct[4], 1e-12);
params.ordering = Ordering::Colamd;
assert_eq!(
solver.factorize(&coo, Some(params)),
Err("subsequent factorizations must not change LinSolParams")
);
}
#[test]
fn factorize_and_solve_work_unsymmetric_colamd() {
let mut x = Vector::new(5);
let rhs = Vector::from(&[8.0, 45.0, -3.0, 3.0, 19.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::umfpack_unsymmetric_5x5();
let mut params = LinSolParams::new();
params.ordering = Ordering::Colamd;
solver.factorize(&coo, Some(params)).unwrap();
assert!(solver.factorized);
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-12);
let mut x_again = Vector::new(5);
solver.solve(&mut x_again, &rhs, false).unwrap();
vec_approx_eq(&x_again, x_correct, 1e-12);
}
#[test]
fn factorize_and_solve_work_unsymmetric_with_matching() {
let mut x = Vector::new(5);
let rhs = Vector::from(&[8.0, 45.0, -3.0, 3.0, 19.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::umfpack_unsymmetric_5x5();
let mut params = LinSolParams::new();
params.matching = Matching::Auto;
solver.factorize(&coo, Some(params)).unwrap();
assert!(solver.factorized);
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-12);
let mut x_again = Vector::new(5);
solver.solve(&mut x_again, &rhs, false).unwrap();
vec_approx_eq(&x_again, x_correct, 1e-12);
}
#[test]
fn factorize_and_solve_work_unsymmetric_pivot_params() {
let mut x = Vector::new(5);
let rhs = Vector::from(&[8.0, 45.0, -3.0, 3.0, 19.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::umfpack_unsymmetric_5x5();
let mut params = LinSolParams::new();
params.pivot_epsilon = Some(1e-12);
params.refinement_nstep = Some(1);
solver.factorize(&coo, Some(params)).unwrap();
assert!(solver.factorized);
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-12);
let mut x_again = Vector::new(5);
solver.solve(&mut x_again, &rhs, false).unwrap();
vec_approx_eq(&x_again, x_correct, 1e-12);
}
#[test]
fn factorize_and_solve_work_sym_psd() {
let mut x = Vector::new(5);
let rhs = Vector::from(&[1.0, 2.0, 3.0, 4.0, 5.0]);
let x_correct = &[-979.0 / 3.0, 983.0, 1961.0 / 12.0, 398.0, 123.0 / 2.0];
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let (coo, _, _, _) = Samples::mkl_positive_definite_5x5_lower();
let mut params = LinSolParams::new();
params.positive_definite = true;
solver.factorize(&coo, Some(params)).unwrap();
assert!(solver.factorized);
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-10);
}
#[test]
fn cudss_simple_spd() {
let mut coo = CooMatrix::new(5, 5, 8, Sym::YesLower).unwrap();
coo.put(0, 0, 4.0).unwrap();
coo.put(1, 1, 3.0).unwrap();
coo.put(2, 0, 1.0).unwrap();
coo.put(2, 1, 2.0).unwrap();
coo.put(2, 2, 5.0).unwrap();
coo.put(3, 3, 1.0).unwrap();
coo.put(4, 2, 1.0).unwrap();
coo.put(4, 4, 2.0).unwrap();
let mut x = Vector::new(5);
let rhs = Vector::from(&[7.0, 12.0, 25.0, 4.0, 13.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut params = LinSolParams::new();
params.positive_definite = true;
let mut solver = SolverCUDSS::new().unwrap();
solver.factorize(&coo, Some(params)).unwrap();
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-10);
}
#[test]
fn cudss_unsymmetric() {
let mut coo = CooMatrix::new(5, 5, 13, Sym::No).unwrap();
coo.put(0, 0, 5.0).unwrap();
coo.put(0, 1, 1.0).unwrap();
coo.put(0, 4, 3.0).unwrap();
coo.put(1, 0, 2.0).unwrap();
coo.put(1, 1, 6.0).unwrap();
coo.put(1, 3, 4.0).unwrap();
coo.put(2, 2, 7.0).unwrap();
coo.put(2, 3, 2.0).unwrap();
coo.put(3, 1, 1.0).unwrap();
coo.put(3, 2, 3.0).unwrap();
coo.put(3, 3, 8.0).unwrap();
coo.put(4, 0, 4.0).unwrap();
coo.put(4, 4, 9.0).unwrap();
let mut x = Vector::new(5);
let rhs = Vector::from(&[22.0, 30.0, 29.0, 43.0, 49.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut solver = SolverCUDSS::new().unwrap();
solver.factorize(&coo, None).unwrap();
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-10);
}
#[test]
fn hybrid_memory_works() {
let mut coo = CooMatrix::new(5, 5, 8, Sym::YesLower).unwrap();
coo.put(0, 0, 4.0).unwrap();
coo.put(1, 1, 3.0).unwrap();
coo.put(2, 0, 1.0).unwrap();
coo.put(2, 1, 2.0).unwrap();
coo.put(2, 2, 5.0).unwrap();
coo.put(3, 3, 1.0).unwrap();
coo.put(4, 2, 1.0).unwrap();
coo.put(4, 4, 2.0).unwrap();
let mut x = Vector::new(5);
let rhs = Vector::from(&[7.0, 12.0, 25.0, 4.0, 13.0]);
let x_correct = &[1.0, 2.0, 3.0, 4.0, 5.0];
let mut params = LinSolParams::new();
params.positive_definite = true;
params.hybrid_memory_factor = Some(0.5);
params.verbose = false;
let mut solver = SolverCUDSS::new().unwrap();
solver.factorize(&coo, Some(params)).unwrap();
solver.solve(&mut x, &rhs, false).unwrap();
vec_approx_eq(&x, x_correct, 1e-10);
}
#[test]
fn solve_handles_errors() {
let mut coo = CooMatrix::new(2, 2, 2, Sym::No).unwrap();
coo.put(0, 0, 123.0).unwrap();
coo.put(1, 1, 456.0).unwrap();
let mut solver = SolverCUDSS::new().unwrap();
assert!(!solver.factorized);
let mut x = Vector::new(2);
let rhs = Vector::new(2);
assert_eq!(
solver.solve(&mut x, &rhs, false),
Err("the function factorize must be called before solve")
);
let mut x = Vector::new(1);
solver.factorize(&coo, None).unwrap();
assert_eq!(
solver.solve(&mut x, &rhs, false),
Err("the dimension of the vector of unknown values x is incorrect")
);
let mut x = Vector::new(2);
let rhs = Vector::new(1);
assert_eq!(
solver.solve(&mut x, &rhs, false),
Err("the dimension of the right-hand side vector is incorrect")
);
}
#[test]
fn cudss_ordering_and_matching_and_pivoting_work() {
assert_eq!(cudss_ordering(Ordering::Amd), CUDSS_REORDERING_ALG_AMD);
assert_eq!(cudss_ordering(Ordering::Amf), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::Auto), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::Best), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::BtfColamd), CUDSS_REORDERING_ALG_BTF_COLAMD);
assert_eq!(cudss_ordering(Ordering::Cholmod), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::Colamd), CUDSS_REORDERING_ALG_COLAMD);
assert_eq!(cudss_ordering(Ordering::Metis), CUDSS_REORDERING_ALG_NESTED_DISSECTION);
assert_eq!(cudss_ordering(Ordering::No), CUDSS_REORDERING_ALG_NONE);
assert_eq!(cudss_ordering(Ordering::Pord), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::Qamd), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_ordering(Ordering::Scotch), CUDSS_REORDERING_ALG_DEFAULT);
assert_eq!(cudss_matching(Matching::None), CUDSS_MATCHING_ALG_NONE);
assert_eq!(cudss_matching(Matching::Auto), CUDSS_MATCHING_ALG_AUTO);
assert_eq!(
cudss_matching(Matching::MaxDiagCount),
CUDSS_MATCHING_ALG_MAX_DIAG_COUNT
);
assert_eq!(cudss_matching(Matching::MaxMinDiag), CUDSS_MATCHING_ALG_MAX_MIN_DIAG);
assert_eq!(
cudss_matching(Matching::MaxMinDiagAlt),
CUDSS_MATCHING_ALG_MAX_MIN_DIAG_ALT
);
assert_eq!(cudss_matching(Matching::MaxDiagSum), CUDSS_MATCHING_ALG_MAX_DIAG_SUM);
assert_eq!(
cudss_matching(Matching::MaxDiagProduct),
CUDSS_MATCHING_ALG_MAX_DIAG_PRODUCT
);
assert_eq!(cudss_pivoting(Pivoting::Auto), CUDSS_PIVOT_AUTO);
assert_eq!(cudss_pivoting(Pivoting::None), CUDSS_PIVOT_NONE);
assert_eq!(cudss_pivoting(Pivoting::GlobalCol), CUDSS_PIVOT_GLOBAL_COL);
assert_eq!(cudss_pivoting(Pivoting::GlobalRow), CUDSS_PIVOT_GLOBAL_ROW);
assert_eq!(cudss_pivoting(Pivoting::Diagonal), CUDSS_PIVOT_DIAGONAL);
assert_eq!(cudss_pivoting(Pivoting::LocalBlock), CUDSS_PIVOT_LOCAL_BLOCK);
}
#[test]
fn handle_cudss_error_code_works() {
let default = "Error: unknown error returned by c-code (cuDSS)";
assert_eq!(
handle_cudss_error_code(ERROR_CUDA_MALLOC),
"cudaMalloc failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDA_MEMCPY),
"cudaMemcpy failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDA_SYNCHRONIZE),
"cudaStreamSynchronize failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_CONFIG_SET),
"cudssConfigSet failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_CONFIG_GET),
"cudssConfigGet failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_MATRIX_CREATE_DN),
"cudssMatrixCreateDn failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_MATRIX_SET_VALUES),
"cudssMatrixSetValues failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_MATRIX_CREATE_CSR),
"cudssMatrixCreateCsr failed in the C code (cuDSS)"
);
assert_eq!(
handle_cudss_error_code(ERROR_CUDSS_DEVICE),
"cuDSS device-side error (check CUDSS_DATA_INFO for details)"
);
assert_eq!(
handle_cudss_error_code(701),
"cuDSS symbolic factorization failed: CUDSS_STATUS_NOT_INITIALIZED"
);
assert_eq!(
handle_cudss_error_code(707),
"cuDSS symbolic factorization failed: CUDSS_STATUS_IR_FAILED"
);
assert_eq!(
handle_cudss_error_code(802),
"cuDSS numeric factorization failed: CUDSS_STATUS_ALLOC_FAILED"
);
assert_eq!(
handle_cudss_error_code(805),
"cuDSS numeric factorization failed: CUDSS_STATUS_EXECUTION_FAILED"
);
assert_eq!(
handle_cudss_error_code(903),
"cuDSS solve failed: CUDSS_STATUS_INVALID_VALUE"
);
assert_eq!(
handle_cudss_error_code(907),
"cuDSS solve failed: CUDSS_STATUS_IR_FAILED"
);
assert_eq!(handle_cudss_error_code(123), default);
}
}