/* automatically generated by rust-bindgen 0.72.1 */
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct __BindgenBitfieldUnit<Storage> {
storage: Storage,
}
impl<Storage> __BindgenBitfieldUnit<Storage> {
#[inline]
pub const fn new(storage: Storage) -> Self {
Self { storage }
}
}
impl<Storage> __BindgenBitfieldUnit<Storage>
where
Storage: AsRef<[u8]> + AsMut<[u8]>,
{
#[inline]
fn extract_bit(byte: u8, index: usize) -> bool {
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
byte & mask == mask
}
#[inline]
pub fn get_bit(&self, index: usize) -> bool {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = self.storage.as_ref()[byte_index];
Self::extract_bit(byte, index)
}
#[inline]
pub unsafe fn raw_get_bit(this: *const Self, index: usize) -> bool {
debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
let byte_index = index / 8;
let byte = unsafe {
*(core::ptr::addr_of!((* this).storage) as *const u8)
.offset(byte_index as isize)
};
Self::extract_bit(byte, index)
}
#[inline]
fn change_bit(byte: u8, index: usize, val: bool) -> u8 {
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
if val { byte | mask } else { byte & !mask }
}
#[inline]
pub fn set_bit(&mut self, index: usize, val: bool) {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = &mut self.storage.as_mut()[byte_index];
*byte = Self::change_bit(*byte, index, val);
}
#[inline]
pub unsafe fn raw_set_bit(this: *mut Self, index: usize, val: bool) {
debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
let byte_index = index / 8;
let byte = unsafe {
(core::ptr::addr_of_mut!((* this).storage) as *mut u8)
.offset(byte_index as isize)
};
unsafe { *byte = Self::change_bit(*byte, index, val) };
}
#[inline]
pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
);
let mut val = 0;
for i in 0..(bit_width as usize) {
if self.get_bit(i + bit_offset) {
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
val |= 1 << index;
}
}
val
}
#[inline]
pub unsafe fn raw_get(this: *const Self, bit_offset: usize, bit_width: u8) -> u64 {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
);
let mut val = 0;
for i in 0..(bit_width as usize) {
if unsafe { Self::raw_get_bit(this, i + bit_offset) } {
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
val |= 1 << index;
}
}
val
}
#[inline]
pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
);
for i in 0..(bit_width as usize) {
let mask = 1 << i;
let val_bit_is_set = val & mask == mask;
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
self.set_bit(index + bit_offset, val_bit_is_set);
}
}
#[inline]
pub unsafe fn raw_set(this: *mut Self, bit_offset: usize, bit_width: u8, val: u64) {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
);
for i in 0..(bit_width as usize) {
let mask = 1 << i;
let val_bit_is_set = val & mask == mask;
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
unsafe { Self::raw_set_bit(this, index + bit_offset, val_bit_is_set) };
}
}
}
pub const CUSOLVER_VER_MAJOR: u32 = 12;
pub const CUSOLVER_VER_MINOR: u32 = 1;
pub const CUSOLVER_VER_PATCH: u32 = 0;
pub const CUSOLVER_VER_BUILD: u32 = 51;
pub const CUSOLVER_VERSION: u32 = 12100;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusolverDnContext {
_unused: [u8; 0],
}
/// This is a pointer type to an opaque cuSolverDN context, which the user must initialize by calling [`cusolverDnCreate`] prior to calling any other library function. An uninitialized Handle object will lead to unexpected behavior, including crashes of cuSolverDN. The handle created and returned by [`cusolverDnCreate`] must be passed to every cuSolverDN function.
pub type cusolverDnHandle_t = *mut cusolverDnContext;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct syevjInfo {
_unused: [u8; 0],
}
pub type syevjInfo_t = *mut syevjInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct gesvdjInfo {
_unused: [u8; 0],
}
pub type gesvdjInfo_t = *mut gesvdjInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusolverDnIRSParams {
_unused: [u8; 0],
}
/// This is a pointer type to an opaque [`cusolverDnIRSParams_t`] structure, which holds parameters for the iterative refinement linear solvers such as `cusolverDnXgesv()`. Use corresponding helper functions described below to either Create/Destroy this structure or Set/Get solver parameters.
pub type cusolverDnIRSParams_t = *mut cusolverDnIRSParams;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusolverDnIRSInfos {
_unused: [u8; 0],
}
/// This is a pointer type to an opaque [`cusolverDnIRSInfos_t`] structure, which holds information about the performed call to an iterative refinement linear solver (such as `cusolverDnXgesv()`). Use corresponding helper functions described below to either Create/Destroy this structure or retrieve solve information.
pub type cusolverDnIRSInfos_t = *mut cusolverDnIRSInfos;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusolverDnParams {
_unused: [u8; 0],
}
pub type cusolverDnParams_t = *mut cusolverDnParams;
/// The [`cusolverDnFunction_t`] type indicates which routine needs to be configured by [`cusolverDnSetAdvOptions`]. The value [`cusolverDnFunction_t::CUSOLVERDN_GETRF`] corresponds to the routine `Getrf`.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverDnFunction_t {
/// Corresponds to `Getrf`.
CUSOLVERDN_GETRF = 0,
CUSOLVERDN_POTRF = 1,
CUSOLVERDN_SYEVBATCHED = 2,
}
pub type size_t = ::core::ffi::c_ulong;
pub type __uint64_t = ::core::ffi::c_ulong;
pub type __off_t = ::core::ffi::c_long;
pub type __off64_t = ::core::ffi::c_long;
pub type FILE = _IO_FILE;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_marker {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_codecvt {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_wide_data {
_unused: [u8; 0],
}
pub type _IO_lock_t = ::core::ffi::c_void;
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub struct _IO_FILE {
pub _flags: ::core::ffi::c_int,
pub _IO_read_ptr: *mut ::core::ffi::c_char,
pub _IO_read_end: *mut ::core::ffi::c_char,
pub _IO_read_base: *mut ::core::ffi::c_char,
pub _IO_write_base: *mut ::core::ffi::c_char,
pub _IO_write_ptr: *mut ::core::ffi::c_char,
pub _IO_write_end: *mut ::core::ffi::c_char,
pub _IO_buf_base: *mut ::core::ffi::c_char,
pub _IO_buf_end: *mut ::core::ffi::c_char,
pub _IO_save_base: *mut ::core::ffi::c_char,
pub _IO_backup_base: *mut ::core::ffi::c_char,
pub _IO_save_end: *mut ::core::ffi::c_char,
pub _markers: *mut _IO_marker,
pub _chain: *mut _IO_FILE,
pub _fileno: ::core::ffi::c_int,
pub _bitfield_align_1: [u32; 0],
pub _bitfield_1: __BindgenBitfieldUnit<[u8; 3usize]>,
pub _short_backupbuf: [::core::ffi::c_char; 1usize],
pub _old_offset: __off_t,
pub _cur_column: ::core::ffi::c_ushort,
pub _vtable_offset: ::core::ffi::c_schar,
pub _shortbuf: [::core::ffi::c_char; 1usize],
pub _lock: *mut _IO_lock_t,
pub _offset: __off64_t,
pub _codecvt: *mut _IO_codecvt,
pub _wide_data: *mut _IO_wide_data,
pub _freeres_list: *mut _IO_FILE,
pub _freeres_buf: *mut ::core::ffi::c_void,
pub _prevchain: *mut *mut _IO_FILE,
pub _mode: ::core::ffi::c_int,
pub _unused3: ::core::ffi::c_int,
pub _total_written: __uint64_t,
pub _unused2: [::core::ffi::c_char; 8usize],
}
impl Default for _IO_FILE {
fn default() -> Self {
let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
unsafe {
::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
s.assume_init()
}
}
}
impl _IO_FILE {
#[inline]
pub fn _flags2(&self) -> ::core::ffi::c_int {
unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 24u8) as u32) }
}
#[inline]
pub fn set__flags2(&mut self, val: ::core::ffi::c_int) {
unsafe {
let val: u32 = ::core::mem::transmute(val);
self._bitfield_1.set(0usize, 24u8, val as u64)
}
}
#[inline]
pub unsafe fn _flags2_raw(this: *const Self) -> ::core::ffi::c_int {
unsafe {
::core::mem::transmute(
<__BindgenBitfieldUnit<
[u8; 3usize],
>>::raw_get(::core::ptr::addr_of!((* this)._bitfield_1), 0usize, 24u8)
as u32,
)
}
}
#[inline]
pub unsafe fn set__flags2_raw(this: *mut Self, val: ::core::ffi::c_int) {
unsafe {
let val: u32 = ::core::mem::transmute(val);
<__BindgenBitfieldUnit<
[u8; 3usize],
>>::raw_set(
::core::ptr::addr_of_mut!((* this)._bitfield_1),
0usize,
24u8,
val as u64,
)
}
}
#[inline]
pub fn new_bitfield_1(
_flags2: ::core::ffi::c_int,
) -> __BindgenBitfieldUnit<[u8; 3usize]> {
let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 3usize]> = Default::default();
__bindgen_bitfield_unit
.set(
0usize,
24u8,
{
let _flags2: u32 = unsafe { ::core::mem::transmute(_flags2) };
_flags2 as u64
},
);
__bindgen_bitfield_unit
}
}
#[repr(C)]
#[repr(align(8))]
#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
pub struct float2 {
pub x: f32,
pub y: f32,
}
#[repr(C)]
#[repr(align(16))]
#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
pub struct double2 {
pub x: f64,
pub y: f64,
}
pub type cuFloatComplex = float2;
pub type cuDoubleComplex = double2;
pub type cuComplex = cuFloatComplex;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CUstream_st {
_unused: [u8; 0],
}
/// The type indicates which part (lower or upper) of the dense matrix was filled and consequently should be used by the function.
///
/// Notice that BLAS implementations often use Fortran characters `‘L’` or `‘l’` (lower) and `‘U’` or `‘u’` (upper) to describe which part of the matrix is filled.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cublasFillMode_t {
/// The lower part of the matrix is filled.
CUBLAS_FILL_MODE_LOWER = 0,
/// The upper part of the matrix is filled.
CUBLAS_FILL_MODE_UPPER = 1,
/// The full matrix is filled.
CUBLAS_FILL_MODE_FULL = 2,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cublasDiagType_t {
CUBLAS_DIAG_NON_UNIT = 0,
CUBLAS_DIAG_UNIT = 1,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cublasSideMode_t {
CUBLAS_SIDE_LEFT = 0,
CUBLAS_SIDE_RIGHT = 1,
}
impl cublasOperation_t {
pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t::CUBLAS_OP_C;
}
/// The [`cublasOperation_t`] type indicates which operation needs to be performed with the dense matrix.
///
/// Notice that BLAS implementations often use Fortran characters `‘N’` or `‘n’` (non-transpose), `‘T’` or `‘t’` (transpose) and `‘C’` or `‘c’` (conjugate transpose) to describe which operations need to be performed with the dense matrix.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cublasOperation_t {
/// The non-transpose operation is selected.
CUBLAS_OP_N = 0,
/// The transpose operation is selected.
CUBLAS_OP_T = 1,
/// The conjugate transpose operation is selected.
CUBLAS_OP_C = 2,
CUBLAS_OP_CONJG = 3,
}
pub type cusolver_int_t = ::core::ffi::c_int;
/// This is a status type returned by the library functions and it can have the following values.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverStatus_t {
/// The operation completed successfully.
CUSOLVER_STATUS_SUCCESS = 0,
/// The cuSolver library was not initialized. This is usually caused by the lack of a prior call, an error in the CUDA Runtime API called by the cuSolver routine, or an error in the hardware setup.
///
/// **To correct:** call [`cusolverDnCreate`] prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuSolver library are correctly installed.
CUSOLVER_STATUS_NOT_INITIALIZED = 1,
/// Resource allocation failed inside the cuSolver library. This is usually caused by a `cudaMalloc()` failure.
///
/// **To correct:** prior to the function call, deallocate previously allocated memory as much as possible.
CUSOLVER_STATUS_ALLOC_FAILED = 2,
/// An unsupported value or parameter was passed to the function (a negative vector size, for example).
///
/// **To correct:** ensure that all the parameters being passed have valid values.
CUSOLVER_STATUS_INVALID_VALUE = 3,
/// The function requires a feature absent from the device architecture; usually caused by the lack of support for atomic operations or double precision.
///
/// **To correct:** compile and run the application on a device with compute capability 5.0 or above.
CUSOLVER_STATUS_ARCH_MISMATCH = 4,
CUSOLVER_STATUS_MAPPING_ERROR = 5,
/// The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.
///
/// **To correct:** check that the hardware, an appropriate version of the driver, and the cuSolver library are correctly installed.
CUSOLVER_STATUS_EXECUTION_FAILED = 6,
/// An internal cuSolver operation failed. This error is usually caused by a `cudaMemcpyAsync()` failure.
///
/// **To correct:** check that the hardware, an appropriate version of the driver, and the cuSolver library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine’s completion.
CUSOLVER_STATUS_INTERNAL_ERROR = 7,
/// The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function.
///
/// **To correct:** check that the fields in `descrA` were set correctly.
CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
/// The parameter combination is not supported, for example batched version is not supported or `M < N` is not supported.
///
/// **To correct:** consult the documentation, and use a supported configuration.
CUSOLVER_STATUS_NOT_SUPPORTED = 9,
CUSOLVER_STATUS_ZERO_PIVOT = 10,
CUSOLVER_STATUS_INVALID_LICENSE = 11,
CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED = 12,
CUSOLVER_STATUS_IRS_PARAMS_INVALID = 13,
CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC = 14,
CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE = 15,
CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER = 16,
CUSOLVER_STATUS_IRS_INTERNAL_ERROR = 20,
CUSOLVER_STATUS_IRS_NOT_SUPPORTED = 21,
CUSOLVER_STATUS_IRS_OUT_OF_RANGE = 22,
CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES = 23,
CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED = 25,
CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED = 26,
CUSOLVER_STATUS_IRS_MATRIX_SINGULAR = 30,
CUSOLVER_STATUS_INVALID_WORKSPACE = 31,
}
/// The [`cusolverEigType_t`] type indicates which type of eigenvalue the solver is.
///
/// Notice that LAPACK implementations often use Fortran integer `1` (A\*x = lambda\*B\*x), `2` (A\*B\*x = lambda\*x), `3` (B\*A\*x = lambda\*x) to indicate which type of eigenvalue the solver is.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverEigType_t {
/// A\*x = lambda\*B\*x.
CUSOLVER_EIG_TYPE_1 = 1,
/// A\*B\*x = lambda\*x.
CUSOLVER_EIG_TYPE_2 = 2,
/// B\*A\*x = lambda\*x.
CUSOLVER_EIG_TYPE_3 = 3,
}
/// The [`cusolverEigMode_t`] type indicates whether or not eigenvectors are computed.
///
/// Notice that LAPACK implementations often use Fortran character `'N'` (only eigenvalues are computed), `'V'` (both eigenvalues and eigenvectors are computed) to indicate whether or not eigenvectors are computed.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverEigMode_t {
/// Only eigenvalues are computed.
CUSOLVER_EIG_MODE_NOVECTOR = 0,
/// Both eigenvalues and eigenvectors are computed.
CUSOLVER_EIG_MODE_VECTOR = 1,
}
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverEigRange_t {
CUSOLVER_EIG_RANGE_ALL = 1001,
CUSOLVER_EIG_RANGE_I = 1002,
CUSOLVER_EIG_RANGE_V = 1003,
}
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverNorm_t {
CUSOLVER_INF_NORM = 104,
CUSOLVER_MAX_NORM = 105,
CUSOLVER_ONE_NORM = 106,
CUSOLVER_FRO_NORM = 107,
}
/// The [`cusolverIRSRefinement_t`] type indicates which solver type would be used for the specific cusolver function. Most of our experimentation shows that [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`] is the best option.
///
/// *More details about the refinement process can be found in Azzam Haidar, Stanimire Tomov, Jack Dongarra, and Nicholas J. Higham. 2018. Harnessing GPU tensor cores for fast FP16 arithmetic to speed up mixed-precision iterative refinement solvers. In Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis (SC ‘18). IEEE Press, Piscataway, NJ, USA, Article 47, 11 pages.*.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverIRSRefinement_t {
/// Solver is not set; this value is what is set when creating the `params` structure. IRS solver will return an error.
CUSOLVER_IRS_REFINE_NOT_SET = 1100,
/// No refinement solver, the IRS solver performs a factorization followed by a solve without any refinement. For example if the IRS solver was [`cusolverDnIRSXgesv`], this is equivalent to a Xgesv routine without refinement and where the factorization is carried out in the lowest precision. If for example the main precision was CUSOLVER_R_64F and the lowest was CUSOLVER_R_64F as well, then this is equivalent to a call to `cusolverDnDgesv()`.
CUSOLVER_IRS_REFINE_NONE = 1101,
/// Classical iterative refinement solver. Similar to the one used in LAPACK routines.
CUSOLVER_IRS_REFINE_CLASSICAL = 1102,
/// Classical iterative refinement solver that uses the GMRES (Generalized Minimal Residual) internally to solve the correction equation at each iteration. We call the *classical refinement iteration* the outer iteration while the `GMRES` is called inner iteration. Note that if the tolerance of the inner GMRES is set very low, lets say to machine precision, then the outer *classical refinement iteration* will performs only one iteration and thus this option will behave like [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`].
CUSOLVER_IRS_REFINE_CLASSICAL_GMRES = 1103,
/// GMRES (Generalized Minimal Residual) based iterative refinement solver. In recent study, the GMRES method has drawn the scientific community attention for its ability to be used as refinement solver that outperforms the classical iterative refinement method. Based on our experimentation, we recommend this setting.
CUSOLVER_IRS_REFINE_GMRES = 1104,
/// Similar to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] which consists of classical refinement process that uses GMRES to solve the inner correction system; here it is a GMRES (Generalized Minimal Residual) based iterative refinement solver that uses another GMRES internally to solve the preconditioned system.
CUSOLVER_IRS_REFINE_GMRES_GMRES = 1105,
CUSOLVER_IRS_REFINE_GMRES_NOPCOND = 1106,
CUSOLVER_PREC_DD = 1150,
CUSOLVER_PREC_SS = 1151,
CUSOLVER_PREC_SHT = 1152,
}
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverPrecType_t {
CUSOLVER_R_8I = 1201,
CUSOLVER_R_8U = 1202,
CUSOLVER_R_64F = 1203,
CUSOLVER_R_32F = 1204,
CUSOLVER_R_16F = 1205,
CUSOLVER_R_16BF = 1206,
CUSOLVER_R_TF32 = 1207,
CUSOLVER_R_AP = 1208,
CUSOLVER_C_8I = 1211,
CUSOLVER_C_8U = 1212,
CUSOLVER_C_64F = 1213,
CUSOLVER_C_32F = 1214,
CUSOLVER_C_16F = 1215,
CUSOLVER_C_16BF = 1216,
CUSOLVER_C_TF32 = 1217,
CUSOLVER_C_AP = 1218,
}
/// The [`cusolverAlgMode_t`] type indicates which algorithm is selected by [`cusolverDnSetAdvOptions`]. The set of algorithms supported for each routine is described in detail along with the routine’s documentation.
///
/// The default algorithm is [`cusolverAlgMode_t::CUSOLVER_ALG_0`]. The user can also provide `NULL` to use the default algorithm.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverAlgMode_t {
CUSOLVER_ALG_0 = 0,
CUSOLVER_ALG_1 = 1,
CUSOLVER_ALG_2 = 2,
}
/// Specifies how the vectors which define the elementary reflectors are stored.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverStorevMode_t {
/// Columnwise.
CUBLAS_STOREV_COLUMNWISE = 0,
/// Rowwise.
CUBLAS_STOREV_ROWWISE = 1,
}
/// Specifies the order in which the elementary reflectors are multiplied to form the block reflector.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverDirectMode_t {
/// Forward.
CUBLAS_DIRECT_FORWARD = 0,
/// Backward.
CUBLAS_DIRECT_BACKWARD = 1,
}
/// The [`cusolverDeterministicMode_t`] type indicates whether multiple cuSolver function executions with the same input have the same bitwise equal result (deterministic) or might have bitwise different results (non-deterministic). In comparison to [cublasAtomicsMode_t](https://docs.nvidia.com/cuda/cublas/#cublasatomicsmode-t), which only includes the usage of atomic functions, [`cusolverDeterministicMode_t`] includes all non-deterministic programming patterns. The deterministic mode can be set and queried using [`cusolverDnSetDeterministicMode`] and [`cusolverDnGetDeterministicMode`] routines, respectively.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverDeterministicMode_t {
/// Compute deterministic results.
CUSOLVER_DETERMINISTIC_RESULTS = 1,
/// Allow non-deterministic results.
CUSOLVER_ALLOW_NON_DETERMINISTIC_RESULTS = 2,
}
/// The [`cusolverMathMode_t`] type is used in [`cusolverDnSetMathMode`] to choose compute precision modes as defined in the following table:
///
/// The following combinations of [`cusolverMathMode_t`] using the bitwise OR operator are allowed:
///
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`] = [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`] | [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`].
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusolverMathMode_t {
/// This is the default math mode. Tensor Cores will be used whenever possible.
CUSOLVER_DEFAULT_MATH = 1,
/// Use FP32 emulation according to the configured emulation strategy (see [`cusolverDnSetEmulationStrategy`]).
CUSOLVER_FP32_EMULATED_BF16X9_MATH = 2,
/// Use FP64 emulation according to the configured emulation strategy (see [`cusolverDnSetEmulationStrategy`]).
CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH = 4,
/// Combination of [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`] and [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`].
CUSOLVER_FP32_FP64_EMULATED_MATH = 6,
}
unsafe extern "C" {
pub fn cusolverGetProperty(
type_: libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverGetVersion(version: *mut ::core::ffi::c_int) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function initializes the cuSolverDN library and creates a handle on the cuSolverDN context. It must be called before any other cuSolverDN API function is invoked. It allocates hardware resources necessary for accessing the GPU.
/// This function allocates 4 MiB or 32 MiB of memory (for GPUs with Compute Capability of 9.0 and higher), which will be used as the cuBLAS workspace for the first user-defined stream on which [`cusolverDnSetStream`] is called.
/// For the default stream and in all the other cases, cuBLAS will manage its own workspace.
///
/// # Parameters
///
/// - `handle`: The pointer to the handle to the cuSolverDN context.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ARCH_MISMATCH`]: The device only supports compute capability 5.0 and above.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The CUDA Runtime initialization failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The initialization succeeded.
pub fn cusolverDnCreate(handle: *mut cusolverDnHandle_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function releases CPU-side resources used by the cuSolverDN library.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The shutdown succeeded.
pub fn cusolverDnDestroy(handle: cusolverDnHandle_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the stream to be used by the cuSolverDN library to execute its routines.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `streamId`: The stream to be used by the library.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The stream was set successfully.
pub fn cusolverDnSetStream(
handle: cusolverDnHandle_t,
streamId: cudaStream_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the stream to be used by the cuSolverDN library to execute its routines.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `streamId`: The stream which is used by `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The stream was set successfully.
pub fn cusolverDnGetStream(
handle: cusolverDnHandle_t,
streamId: *mut cudaStream_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the deterministic mode of all cuSolverDN functions for `handle`. For improved performance,
/// non-deterministic results can be allowed. Affected functions are `cusolverDn<t>geqrf()`, `cusolverDn<t>syevd()`, `cusolverDn<t>syevdx()`, `cusolverDn<t>gesvd()` (if `m > n`), `cusolverDn<t>gesvdj()`, [`cusolverDnXgeqrf`], [`cusolverDnXsyevd`], [`cusolverDnXsyevdx`], [`cusolverDnXgesvd`] (if `m > n`), [`cusolverDnXgesvdr`] and [`cusolverDnXgesvdp`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mode`: The deterministic mode to be used with `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The modes were set successfully.
pub fn cusolverDnSetDeterministicMode(
handle: cusolverDnHandle_t,
mode: cusolverDeterministicMode_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the deterministic mode which is set for `handle`.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mode`: The deterministic mode of `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `mode` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The modes were queried successfully.
pub fn cusolverDnGetDeterministicMode(
handle: cusolverDnHandle_t,
mode: *mut cusolverDeterministicMode_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the math modes of all cuSolverDN functions for `handle`. For more information about the effects of the corresponding math modes, please refer to [`cusolverMathMode_t`]. Note that math modes can be combined, e.g., `cusolverDnSetMathMode(handle, CUSOLVER_FP32_EMULATED_BF16X9_MATH | CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH)`. Please see [`cusolverMathMode_t`] for allowed combinations.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: An invalid mode was given.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mode was set successfully.
pub fn cusolverDnSetMathMode(
handle: cusolverDnHandle_t,
mode: cusolverMathMode_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the math modes which are set for `handle`. Note that math modes can be combined, e.g., `cusolverDnSetMathMode(handle, CUSOLVER_FP32_EMULATED_BF16X9_MATH | CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH)`. Please see [`cusolverMathMode_t`] for allowed combinations.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `modes` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mode was set successfully.
pub fn cusolverDnGetMathMode(
handle: cusolverDnHandle_t,
mode: *mut cusolverMathMode_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the emulation strategy of all cuSolverDN functions for `handle`. For more information about the effects of the corresponding strategies, please refer to the analogous definition of [cublasEmulationStrategy_t](https://docs.nvidia.com/cuda/cublas/#cublasemulationstrategy-t).
///
/// The emulation strategy set by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `strategy`: The emulation strategy to be used with `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: strategy was not a supported emulation strategy.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The strategy was set successfully.
pub fn cusolverDnSetEmulationStrategy(
handle: cusolverDnHandle_t,
strategy: cudaEmulationStrategy_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the emulation strategy which is set for `handle`.
///
/// The emulation strategy returned by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `strategy`: The emulation strategy of `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `emulationStrategy` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The strategy was queried successfully.
pub fn cusolverDnGetEmulationStrategy(
handle: cusolverDnHandle_t,
strategy: *mut cudaEmulationStrategy_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets how the number of mantissa bits is determined for fixed point FP64 emulation. For more information about the effects of the corresponding control modes, please refer to [`cudaEmulationMantissaControl_t`].
///
/// The mantissa control set by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `control`: The mantissa control mode to be used with `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `control` is not a valid [`cudaEmulationMantissaControl_t`] value.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa control was set successfully.
pub fn cusolverDnSetFixedPointEmulationMantissaControl(
handle: cusolverDnHandle_t,
control: cudaEmulationMantissaControl_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries how the number of mantissa bits is determined for fixed point FP64 emulation.
///
/// The mantissa control returned by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `control`: The mantissa control mode of `handle`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `control` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa control was queried successfully.
pub fn cusolverDnGetFixedPointEmulationMantissaControl(
handle: cusolverDnHandle_t,
control: *mut cudaEmulationMantissaControl_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the maximum number of mantissa bits for fixed point FP64 emulation.
///
/// The maximum mantissa bit count set by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mantissaBitCount`: The number of mantissa bits to be used. Setting `mantissaBitCount = 0` resets to the default configuration as described in [cuBLAS defaults](https://docs.nvidia.com/cuda/cublas/#default-library-configurations).
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `mantissaBitCount` is less than `0`.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa bit count was set successfully.
pub fn cusolverDnSetFixedPointEmulationMaxMantissaBitCount(
handle: cusolverDnHandle_t,
mantissaBitCount: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the maximum number of mantissa bits used for fixed point FP64 emulation.
///
/// The mantissa bit count returned by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mantissaBitCount`: The maximum number of mantissa bits used.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `mantissaBitCount` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa bit count was queried successfully.
pub fn cusolverDnGetFixedPointEmulationMaxMantissaBitCount(
handle: cusolverDnHandle_t,
mantissaBitCount: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the mantissa bit offset for fixed point FP64 emulation in case of dynamic mantissa control mode.
///
/// The mantissa bit offset, which is set by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`]
///
/// **And** the following mantissa control is enabled (see also [`cusolverDnSetFixedPointEmulationMantissaControl`]):
///
/// * `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC`
///
/// You may tune values for `mantissaBitOffset` based on your accuracy and performance requirements, e.g., choose negative values `-8`, `-16`, … for better performance while fewer mantissa bits may reduce accuracy.
///
/// Please note that values of `mantissaBitOffset` which are unequal to zero require the mantissa control to be equal to `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC`. Otherwise, the computational cuSOLVER routines will return [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], indicating an unsupported handle state.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mantissaBitOffset`: The mantissa bit offset (default = `0`) to be used when the mantissa control is `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa bit offset was set successfully.
pub fn cusolverDnSetFixedPointEmulationMantissaBitOffset(
handle: cusolverDnHandle_t,
mantissaBitOffset: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the mantissa bit offset for fixed point FP64 emulation in case of dynamic mantissa control mode.
///
/// The mantissa bit offset, which is returned by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`]
///
/// **And** the following mantissa control is enabled (see also [`cusolverDnSetFixedPointEmulationMantissaControl`]):
///
/// * `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC`
///
/// Please note that values of `mantissaBitOffset` which are unequal to zero require the mantissa control to be equal to `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC`. Otherwise, the computational cuSOLVER routines will return [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], indicating an unsupported handle state.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mantissaBitOffset`: The mantissa bit offset used when `CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC` is in use.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `mantissaBitOffset` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The mantissa bit offset was queried successfully.
pub fn cusolverDnGetFixedPointEmulationMantissaBitOffset(
handle: cusolverDnHandle_t,
mantissaBitOffset: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the handling of special floating point values for `handle`, which is used **once** floating point emulation is allowed.
///
/// The special value support set by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`].
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mask`: If set to `CUDA_EMULATION_SPECIAL_VALUE_SUPPORT_DEFAULT`, values are propagated as expected. Performance of floating point emulated math may improve if set to `CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NONE` for which the propagation of special values is undefined.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The special value support was set successfully.
pub fn cusolverDnSetEmulationSpecialValuesSupport(
handle: cusolverDnHandle_t,
mask: cudaEmulationSpecialValuesSupport_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function queries the special floating point value support which is set for `handle` if floating point emulation is allowed.
///
/// The special floating point value support returned by this API only has an effect, **once** one of the following math modes is enabled (see also [`cusolverMathMode_t`]):
///
/// * [`cusolverMathMode_t::CUSOLVER_FP32_EMULATED_BF16X9_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP64_EMULATED_FIXEDPOINT_MATH`]
/// * [`cusolverMathMode_t::CUSOLVER_FP32_FP64_EMULATED_MATH`]
///
/// Otherwise, special floating point values are handled as expected.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `mask`: The special value support of `handle`. Please see [`cudaEmulationSpecialValuesSupport_t`] for more information about the allowed values of `mask`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: `mask` is a `NULL` pointer.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The special value handling was queried successfully.
pub fn cusolverDnGetEmulationSpecialValuesSupport(
handle: cusolverDnHandle_t,
mask: *mut cudaEmulationSpecialValuesSupport_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the structure of parameters for an IRS solver such as the [`cusolverDnIRSXgesv`] or the [`cusolverDnIRSXgels`] functions to default values. The params structure created by this function can be used by one or more call to the same or to a different IRS solver. Note that in CUDA 10.2, the behavior was different and a new `params` structure was needed to be created per each call to an IRS solver. Also note that the user can also change configurations of the params and then call a new IRS instance, but be careful that the previous call was done because any change to the configuration before the previous call was done could affect it.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The structure was created and initialized successfully.
pub fn cusolverDnIRSParamsCreate(
params_ptr: *mut cusolverDnIRSParams_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the Params structure.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED`]: Not all the `Infos` structure associated with this `Params` structure have been destroyed yet.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The resources were released successfully.
pub fn cusolverDnIRSParamsDestroy(params: cusolverDnIRSParams_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the refinement solver to be used in the Iterative Refinement Solver functions such as the [`cusolverDnIRSXgesv`] or the [`cusolverDnIRSXgels`] functions. Note that the user has to set the refinement algorithm before a first call to the IRS solver because it is NOT set by default with the creating of params. Details about values that can be set to and theirs meaning are described in the table below.
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_NOT_SET`]: Solver is not set, this value is what is set when creating the params structure. IRS solver will return an error.
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_NONE`]: No refinement solver; the IRS solver performs a factorization followed by a solve without any refinement. For example, if the IRS solver was [`cusolverDnIRSXgesv`], this is equivalent to a Xgesv routine without refinement and where the factorization is carried out in the lowest precision. If for example the main precision was CUSOLVER_R_64F and the lowest was CUSOLVER_R_64F as well, then this is equivalent to a call to `cusolverDnDgesv()`.
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`]: Classical iterative refinement solver. Similar to the one used in LAPACK routines.
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`]: GMRES (Generalized Minimal Residual) based iterative refinement solver. In recent study, the GMRES method has drawn the scientific community attention for its ability to be used as refinement solver that outperforms the classical iterative refinement method. Based on our experimentation, we recommend this setting.
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`]: Classical iterative refinement solver that uses the GMRES (Generalized Minimal Residual) internally to solve the correction equation at each iteration. We call the *classical refinement iteration* the outer iteration while the `GMRES` is called inner iteration. Note that if the tolerance of the inner GMRES is set very low, let say to machine precision, then the outer *classical refinement iteration* will performs only one iteration and thus this option will behaves like [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`].
///
/// [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`]: Similar to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] which consists of classical refinement process that uses GMRES to solve the inner correction system, here it is a GMRES (Generalized Minimal Residual) based iterative refinement solver that uses another GMRES internally to solve the preconditioned system.
///
/// # Parameters
///
/// - `params`: The [`cusolverDnIRSParams_t`]`Params` structure.
///
/// # Return value
///
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`]: Classical iterative refinement solver. Similar to the one used in LAPACK routines.
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`]: Classical iterative refinement solver that uses the GMRES (Generalized Minimal Residual) internally to solve the correction equation at each iteration. We call the *classical refinement iteration* the outer iteration while the `GMRES` is called inner iteration. Note that if the tolerance of the inner GMRES is set very low, let say to machine precision, then the outer *classical refinement iteration* will performs only one iteration and thus this option will behaves like [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`].
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`]: GMRES (Generalized Minimal Residual) based iterative refinement solver. In recent study, the GMRES method has drawn the scientific community attention for its ability to be used as refinement solver that outperforms the classical iterative refinement method. Based on our experimentation, we recommend this setting.
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`]: Similar to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] which consists of classical refinement process that uses GMRES to solve the inner correction system, here it is a GMRES (Generalized Minimal Residual) based iterative refinement solver that uses another GMRES internally to solve the preconditioned system.
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_NONE`]: No refinement solver; the IRS solver performs a factorization followed by a solve without any refinement. For example, if the IRS solver was [`cusolverDnIRSXgesv`], this is equivalent to a Xgesv routine without refinement and where the factorization is carried out in the lowest precision. If for example the main precision was CUSOLVER_R_64F and the lowest was CUSOLVER_R_64F as well, then this is equivalent to a call to `cusolverDnDgesv()`.
/// - [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_NOT_SET`]: Solver is not set, this value is what is set when creating the params structure. IRS solver will return an error.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetRefinementSolver(
params: cusolverDnIRSParams_t,
refinement_solver: cusolverIRSRefinement_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the main precision for the Iterative Refinement Solver (IRS). By main precision, we mean, the type of the Input and Output data. Note that the user has to set both the main and lowest precision before a first call to the IRS solver because they are NOT set by default with the `params` structure creation, as it depends on the Input Output data type and user request. user can set it by either calling this function or by calling [`cusolverDnIRSParamsSetSolverPrecisions`] which set both the main and the lowest precision together. All possible combinations of main/lowest precision are described in the table in the [`cusolverDnIRSParamsSetSolverPrecisions`] section above.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `solver_main_precision`: Allowed Inputs/Outputs datatype (for example CUSOLVER_R_FP64 for a real double precision data). See the table in the [`cusolverDnIRSParamsSetSolverPrecisions`] section above for the supported precisions.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetSolverMainPrecision(
params: cusolverDnIRSParams_t,
solver_main_precision: cusolverPrecType_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the lowest precision that will be used by Iterative Refinement Solver. By lowest precision, we mean the solver is allowed to use as lowest computational precision during the LU factorization process. Note that the user has to set both the main and lowest precision before a first call to the IRS solver because they are NOT set by default with the `params` structure creation, as it depends on the Input Output data type and user request. Usually the lowest precision defines the speedup that can be achieved. The ratio of the performance of the lowest precision over the main precision (e.g., Inputs/Outputs datatype) define somehow the upper bound of the speedup that could be obtained. More precisely, it depends on many factors, but for large matrices sizes, it is the ratio of the matrix-matrix rank-k product (e.g., GEMM where K is 256 and M=N=size of the matrix) that define the possible speedup. For instance, if the inout precision is real double precision CUSOLVER_R_64F and the lowest precision is CUSOLVER_R_32F, then we can expect a speedup of at most 2X for large problem sizes. If the lowest precision was CUSOLVER_R_16F, then we can expect 3X-4X. A reasonable strategy should take the number of right-hand sides, the size of the matrix as well as the convergence rate into account.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The Params structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetSolverLowestPrecision(
params: cusolverDnIRSParams_t,
solver_lowest_precision: cusolverPrecType_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets both the main and the lowest precision for the Iterative Refinement Solver (IRS). By main precision, we mean the precision of the Input and Output datatype. By lowest precision, we mean the solver is allowed to use as lowest computational precision during the LU factorization process. Note that the user has to set both the main and lowest precision before the first call to the IRS solver because they are NOT set by default with the `params` structure creation, as it depends on the Input Output data type and user request. It is a wrapper to both [`cusolverDnIRSParamsSetSolverMainPrecision`] and [`cusolverDnIRSParamsSetSolverLowestPrecision`]. All possible combinations of main/lowest precision are described in the table below. Usually the lowest precision defines the speedup that can be achieved. The ratio of the performance of the lowest precision over the main precision (e.g., Inputs/Outputs datatype) define the upper bound of the speedup that could be obtained. More precisely, it depends on many factors, but for large matrices sizes, it is the ratio of the matrix-matrix rank-k product (e.g., GEMM where K is 256 and M=N=size of the matrix) that define the possible speedup. For instance, if the inout precision is real double precision CUSOLVER_R_64F and the lowest precision is CUSOLVER_R_32F, then we can expect a speedup of at most 2X for large problem sizes. If the lowest precision was CUSOLVER_R_16F, then we can expect 3X-4X. A reasonable strategy should take the number of right-hand sides, the size of the matrix as well as the convergence rate into account.
///
/// **Supported Inputs/Outputs data type and lower precision for the IRS solver**
///
/// | **Inputs/Outputs Data Type (e.g., main precision)** | **Supported values for the lowest precision** |
/// | --- | --- |
/// | [`cusolverPrecType_t::CUSOLVER_C_64F`] | `CUSOLVER_C_64F, CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_C_32F`] | `CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_64F`] | `CUSOLVER_R_64F, CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_32F`] | `CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `solver_main_precision`: Allowed Inputs/Outputs datatype (for example CUSOLVER_R_FP64 for a real double precision data). See the table below for the supported precisions.
/// - `solver_lowest_precision`: Allowed lowest compute type (for example CUSOLVER_R_16F for half precision computation). See the table below for the supported precisions.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetSolverPrecisions(
params: cusolverDnIRSParams_t,
solver_main_precision: cusolverPrecType_t,
solver_lowest_precision: cusolverPrecType_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the tolerance for the refinement solver. By default it is such that all the RHS satisfy:
///
/// `RNRM < SQRT(N)`XNRM`ANRM`EPS`BWDMAX` where
///
/// * RNRM is the infinity-norm of the residual
/// * XNRM is the infinity-norm of the solution
/// * ANRM is the infinity-operator-norm of the matrix A
/// * EPS is the machine epsilon for the Inputs/Outputs datatype that matches LAPACK <X>LAMCH(‘Epsilon’)
/// * BWDMAX, the value BWDMAX is fixed to 1.0
///
/// The user can use this function to change the tolerance to a lower or higher value. Our goal is to give the user more control such a way he can investigate and control every detail of the IRS solver. Note that the tolerance value is always in *real double precision* whatever the Inputs/Outputs datatype is.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `val`: Double precision real value to which the refinement tolerance will be set.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetTol(
params: cusolverDnIRSParams_t,
val: f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the tolerance for the inner refinement solver when the refinement solver consists of two-levels solver (for example, [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`] cases). It is not referenced in case of one level refinement solver such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`]. It is set to 1e-4 by default. This function sets the tolerance for the inner solver (e.g. the inner GMRES). For example, if the Refinement Solver was set to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`], setting this tolerance mean that the inner GMRES solver will converge to that tolerance at each outer iteration of the classical refinement solver. Our goal is to give the user more control such a way he can investigate and control every detail of the IRS solver. Note the, the tolerance value is always in *real double precision* whatever the Inputs/Outputs datatype is.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `val`: Double precision real value to which the tolerance of the inner refinement solver will be set.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetTolInner(
params: cusolverDnIRSParams_t,
val: f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the total number of allowed refinement iterations after which the solver will stop. Total means any iteration which means the sum of the outer and the inner iterations (inner is meaningful when two-levels refinement solver is set). Default value is set to 50. Our goal is to give the user more control such a way he can investigate and control every detail of the IRS solver.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetMaxIters(
params: cusolverDnIRSParams_t,
maxiters: cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the maximal number of iterations allowed for the inner refinement solver. It is not referenced in case of one level refinement solver such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`]. The inner refinement solver will stop after reaching either the inner tolerance or the MaxItersInner value. By default, it is set to 50. Note that this value could not be larger than the MaxIters since MaxIters is the total number of allowed iterations. Note that if the user calls [`cusolverDnIRSParamsSetMaxIters`] after calling this function, `SetMaxIters` has priority and will overwrite `MaxItersInner` to the minimum value of `(MaxIters, MaxItersInner)`.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `maxiters_inner`: Maximum number of allowed inner iterations for the inner refinement solver. Meaningful when the refinement solver is a two-levels solver such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`]. Value should be less or equal to `MaxIters`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID`]: If the value was larger than `MaxIters`.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsSetMaxItersInner(
params: cusolverDnIRSParams_t,
maxiters_inner: cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function returns the current setting in the `params` structure for the maximal allowed number of iterations (for example, either the default `MaxIters`, or the one set by the user in case he set it using [`cusolverDnIRSParamsSetMaxIters`]). Note that this function returns the current setting in the `params` configuration and not to be confused with the [`cusolverDnIRSInfosGetMaxIters`] which return the maximal allowed number of iterations for a particular call to an IRS solver. To be clearer, the `params` structure can be used for many calls to an IRS solver. A user can change the allowed `MaxIters` between calls while the `Infos` structure in [`cusolverDnIRSInfosGetMaxIters`] contains information about a particular call and cannot be reused for different calls, and thus, [`cusolverDnIRSInfosGetMaxIters`] returns the allowed `MaxIters` for that call.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
/// - `maxiters`: The maximal number of iterations that is currently set.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsGetMaxIters(
params: cusolverDnIRSParams_t,
maxiters: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function enable the fallback to the main precision in case the Iterative Refinement Solver (IRS) failed to converge. In other term, if the IRS solver failed to converge, the solver will return a no convergence code (e.g., `niter` < 0), but can either return the non-convergent solution as it is (e.g., disable fallback) or can fallback (e.g., enable fallback) to the main precision (which is the precision of the Inputs/Outputs data) and solve the problem from scratch returning the good solution. This is the behavior by default, and it will guarantee that the IRS solver always provide the good solution. This function is provided because we provided [`cusolverDnIRSParamsDisableFallback`] which allows the user to disable the fallback and thus this function allow the user to re-enable it.
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsEnableFallback(
params: cusolverDnIRSParams_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function disables the fallback to the main precision in case the Iterative Refinement Solver (IRS) failed to converge. In other term, if the IRS solver failed to converge, the solver will return a no convergence code (e.g., `niter` < 0), but can either return the non-convergent solution as it is (e.g., disable fallback) or can fallback (e.g., enable fallback) to the main precision (which is the precision of the Inputs/Outputs data) and solve the problem from scratch returning the good solution. This function disables the fallback and the returned solution is whatever the refinement solver was able to reach before it returns. Disabling fallback does not guarantee that the solution is the good one. However, if users want to keep getting the solution of the lower precision in case the IRS did not converge after certain number of iterations, they need to disable the fallback. The user can re-enable it by calling [`cusolverDnIRSParamsEnableFallback`].
///
/// # Parameters
///
/// - `params`: The `cusolverDnIRSParams_t Params` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The `Params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSParamsDisableFallback(
params: cusolverDnIRSParams_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the `Infos` structure. This function destroys all the information (for example, Niters performed, OuterNiters performed, residual history etc.) about a solver call; thus, this function should only be called after the user is finished with the information.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The resources were released successfully.
pub fn cusolverDnIRSInfosDestroy(infos: cusolverDnIRSInfos_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the `Infos` structure that will hold the refinement information of an Iterative Refinement Solver (IRS) call. Such information includes the total number of iterations that was needed to converge (`Niters`), the outer number of iterations (meaningful when two-levels preconditioner such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] is used ), the maximal number of iterations that was allowed for that call, and a pointer to the matrix of the convergence history residual norms. The `Infos` structure needs to be created before a call to an IRS solver. The `Infos` structure is valid for only one call to an IRS solver, since it holds info about that solve and thus each solve will requires its own `Infos` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The structure was initialized successfully.
pub fn cusolverDnIRSInfosCreate(
infos_ptr: *mut cusolverDnIRSInfos_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function returns the total number of iterations performed by the IRS solver. If it was negative, it means that the IRS solver did not converge and if the user did not disable the fallback to full precision, then the fallback to a full precision solution happened and solution is good. Please refer to the description of negative `niters` values in the corresponding IRS linear solver functions such as `cusolverDnXgesv()` or `cusolverDnXgels()`.
///
/// # Parameters
///
/// - `infos`: The `cusolverDnIRSInfos_t Infos` structure.
/// - `niters`: The total number of iterations performed by the IRS solver.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSInfosGetNiters(
infos: cusolverDnIRSInfos_t,
niters: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function returns the number of iterations performed by the outer refinement loop of the IRS solver. When the refinement solver consists of a one-level solver such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`], it is the same as `Niters`. When the refinement solver consists of a two-levels solver such as [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`], it is the number of iterations of the outer loop. Refer to the description of the [`cusolverIRSRefinement_t`] for more details.
///
/// # Parameters
///
/// - `infos`: The `cusolverDnIRSInfos_t Infos` structure.
/// - `outer_niters`: The number of iterations of the outer refinement loop of the IRS solver.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSInfosGetOuterNiters(
infos: cusolverDnIRSInfos_t,
outer_niters: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function tells the IRS solver to store the convergence history (residual norms) of the refinement phase in a matrix that can be accessed via a pointer returned by the [`cusolverDnIRSInfosGetResidualHistory`] function.
///
/// # Parameters
///
/// - `infos`: The `cusolverDnIRSInfos_t Infos` structure.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSInfosRequestResidual(
infos: cusolverDnIRSInfos_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// If the user called [`cusolverDnIRSInfosRequestResidual`] before the call to the IRS function, then the IRS solver will store the convergence history (residual norms) of the refinement phase in a matrix that can be accessed via a pointer returned by this function. The datatype of the residual norms depends on the input and output data type. If the Inputs/Outputs datatype is double precision real or complex (CUSOLVER_R_FP64 or CUSOLVER_C_FP64), this residual will be of type real double precision (FP64) `double`, otherwise if the Inputs/Outputs datatype is single precision real or complex (CUSOLVER_R_FP32 or CUSOLVER_C_FP32), this residual will be real single precision FP32 `float`.
///
/// The residual history matrix consists of two columns (even for the multiple right-hand side case NRHS) of `MaxIters+1` row, thus a matrix of size (`MaxIters+1,2`). Only the first `OuterNiters+1` rows contains the residual norms the other (e.g., OuterNiters+2:Maxiters+1) are garbage. On the first column, each row *“i”* specify the total number of iterations happened till this outer iteration *“i”* and on the second columns the residual norm corresponding to this outer iteration *“i”*. Thus, the first row (e.g., outer iteration *“0”*) consists of the initial residual (e.g., the residual before the refinement loop start) then the consecutive rows are the residual obtained at each outer iteration of the refinement loop. Note, it only consists of the history of the outer loop.
///
/// If the refinement solver was [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`], then OuterNiters=Niters (Niters is the total number of iterations performed) and there is Niters+1 rows of norms that correspond to the Niters outer iterations.
///
/// If the refinement solver was [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] or [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`], then OuterNiters <= Niters corresponds to the outer iterations performed by the outer refinement loop. Thus, there is OuterNiters+1 residual norms where row *“i”* correspond to the outer iteration *“i”* and the first column specify the total number of iterations (outer and inner) that were performed till this step the second columns correspond to the residual norm at this step.
///
/// For example, let’s say the user specifies [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`] as a refinement solver and say it needed 3 outer iterations to converge and 4,3,3 inner iterations at each outer, respectively. This consists of 10 total iterations. Row 0 corresponds to the first residual before the refinement start, so it has 0 in its first column. On row 1 which corresponds to the outer iteration 1, it will be 4 (4 is the total number of iterations that were performed till now), on row 2 it will be 7, and on row 3 it will be 10.
///
/// In summary, let’s define `ldh=Maxiters+1`, the leading dimension of the residual matrix. then `residual_history\[i\]` shows the total number of iterations performed at the outer iteration *“i”* and `residual_history\[i+ldh\]` corresponds to the norm of the residual at this outer iteration.
///
/// # Parameters
///
/// - `infos`: The `cusolverDnIRSInfos_t Infos` structure.
/// - `residual_history`: Returns a void pointer to the matrix of the convergence history residual norms. See the description above for the relation between the residual norm datatype and the inout datatype.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: This function was called without calling [`cusolverDnIRSInfosRequestResidual`] in advance.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSInfosGetResidualHistory(
infos: cusolverDnIRSInfos_t,
residual_history: *mut *mut ::core::ffi::c_void,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function returns the maximal allowed number of iterations that was set for the corresponding call to the IRS solver. Note that this function returns the setting that was set when that call happened and is not to be confused with the [`cusolverDnIRSParamsGetMaxIters`] which returns the current setting in the `params` configuration structure. To be clearer, the `params` structure can be used for many calls to an IRS solver. A user can change the allowed `MaxIters` between calls while the `Infos` structure in [`cusolverDnIRSInfosGetMaxIters`] contains information about a particular call and cannot be reused for different calls, thus [`cusolverDnIRSInfosGetMaxIters`] returns the allowed `MaxIters` for that call.
///
/// # Parameters
///
/// - `infos`: The `cusolverDnIRSInfos_t Infos` structure.
/// - `maxiters`: The maximal number of iterations that is currently set.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The `Infos` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSInfosGetMaxIters(
infos: cusolverDnIRSInfos_t,
maxiters: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZZgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZCgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZKgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZEgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZYgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCCgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCEgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCKgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCYgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDDgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDSgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDHgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDBgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDXgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSSgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSHgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSBgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSXgesv(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZZgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZCgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZKgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZEgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZYgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCCgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCKgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCEgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCYgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDDgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDSgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDHgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDBgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDXgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSSgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSHgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSBgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSXgesv_bufferSize(
handle: cusolverDnHandle_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dipiv: *mut cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZZgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZCgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZKgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZEgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZYgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCCgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCKgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCEgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCYgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDDgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDSgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDHgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDBgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDXgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSSgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSHgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSBgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSXgels(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
iter: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZZgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZCgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZKgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZEgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZYgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuDoubleComplex,
ldda: cusolver_int_t,
dB: *mut cuDoubleComplex,
lddb: cusolver_int_t,
dX: *mut cuDoubleComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCCgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCKgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCEgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCYgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut cuComplex,
ldda: cusolver_int_t,
dB: *mut cuComplex,
lddb: cusolver_int_t,
dX: *mut cuComplex,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDDgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDSgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDHgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDBgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDXgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f64,
ldda: cusolver_int_t,
dB: *mut f64,
lddb: cusolver_int_t,
dX: *mut f64,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSSgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSHgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSBgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSXgels_bufferSize(
handle: cusolverDnHandle_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut f32,
ldda: cusolver_int_t,
dB: *mut f32,
lddb: cusolver_int_t,
dX: *mut f32,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function is designed to perform same functionality as `cusolverDn<T1><T2>gesv()` functions, but wrapped in a more generic and expert interface that gives user more control to parametrize the function as well as it provides more information on output. [`cusolverDnIRSXgesv`] allows additional control of the solver parameters such as setting:
///
/// * the main precision (Inputs/Outputs precision) of the solver
/// * the lowest precision to be used internally by the solver
/// * the refinement solver type
/// * the maximum allowed number of iterations in the refinement phase
/// * the tolerance of the refinement solver
/// * the fallback to main precision
/// * and more
///
/// through the configuration parameters structure `gesv_irs_params` and its helper functions. For more details about what configuration can be set and its meaning please refer to all the functions in the cuSolverDN Helper Function Section that start with `cusolverDnIRSParamsxxxx()`. Moreover, [`cusolverDnIRSXgesv`] provides additional information on the output such as the convergence history (e.g., the residual norms) at each iteration and the number of iterations needed to converge. For more details about what information can be retrieved and its meaning please refer to all the functions in the cuSolverDN Helper Function Section that start with `cusolverDnIRSInfosxxxx()`
///
/// The function returns value describes the results of the solving process. A [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] indicates that the function finished with success otherwise, it indicates if one of the API arguments is incorrect, or if the configurations of params/infos structure is incorrect or if the function did not finish with success. More details about the error can be found by checking the `niters` and the `dinfo` API parameters. See their description below for further details. User should provide the required workspace allocated on device for the [`cusolverDnIRSXgesv`] function. The amount of bytes required for the function can be queried by calling the respective function [`cusolverDnIRSXgesv_bufferSize`]. Note that, if the user would like a particular configuration to be set via the params structure, it should be set before the call to [`cusolverDnIRSXgesv_bufferSize`] to get the size of the required workspace.
///
/// Tensor Float (TF32), introduced with NVIDIA Ampere architecture GPUs, is the most robust tensor core accelerated compute mode for the iterative refinement solver. It is able to solve the widest range of problems in HPC arising from different applications and provides up to 4X and 5X speedup for real and complex systems, respectively. On Volta and Turing architecture GPUs, half precision tensor core acceleration is recommended. In cases where the iterative refinement solver fails to converge to the desired accuracy (main precision, INOUT data precision), it is recommended to use main precision as internal lowest precision.
///
/// The following table provides all possible combinations values for the lowest precision corresponding to the Inputs/Outputs data type. Note that if the lowest precision matches the Inputs/Outputs datatype, then the main precision factorization will be used.
///
/// **Supported Inputs/Outputs data type and lower precision for the IRS solver**
///
/// | **Inputs/Outputs Data Type (e.g., main precision)** | **Supported values for the lowest precision** |
/// | --- | --- |
/// | [`cusolverPrecType_t::CUSOLVER_C_64F`] | `CUSOLVER_C_64F, CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_C_32F`] | `CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_64F`] | `CUSOLVER_R_64F, CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_32F`] | `CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
///
/// The [`cusolverDnIRSXgesv_bufferSize`] function returns the required workspace buffer size in bytes for the corresponding `cusolverDnXgesv()` call with the given `gesv_irs_params` configuration.
///
/// * `n<0`
/// * `lda<max(1,n)`
/// * `ldb<max(1,n)`
/// * `ldx<max(1,n)`.
///
/// # Parameters
///
/// - `handle`: Handle to the cusolverDn library context.
/// - `gesv_irs_params`: Configuration parameters structure, can serve one or more calls to any IRS solver.
/// - `gesv_irs_infos`: Info structure, where information about a particular solve will be stored. The `gesv_irs_infos` structure correspond to a particular call. Thus different calls requires different `gesv_irs_infos` structure otherwise, it will be overwritten.
/// - `n`: Number of rows and columns of square matrix `A`. Should be non-negative.
/// - `nrhs`: Number of right hand sides to solve. Should be non-negative. Note that, `nrhs` is limited to 1 if the selected IRS refinement solver is [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`], [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`], [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`].
/// - `dA`: Matrix `A` with size `n-by-n`. Can’t be `NULL`. On return - will contain the factorization of the matrix A in the main precision (`A = P * L * U`, where P - permutation matrix defined by vector ipiv, L and U - lower and upper triangular matrices) if the iterative refinement solver was set to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_NONE`] and the lowest precision is equal to the main precision (Inputs/Outputs datatype), or if the iterative refinement solver did not converge and the fallback to main precision was enabled (fallback enabled is the default setting); unchanged otherwise.
/// - `ldda`: Leading dimension of two-dimensional array used to store matrix `A`. `lda >= n`.
/// - `dB`: Set of right hand sides `B` of size `n-by-nrhs`. Can’t be `NULL`.
/// - `lddb`: Leading dimension of two-dimensional array used to store matrix of right hand sides `B`. `ldb >= n`.
/// - `dX`: Set of solution vectors `X` of size `n-by-nrhs`. Can’t be `NULL`.
/// - `lddx`: Leading dimension of two-dimensional array used to store matrix of solution vectors `X`. `ldx >= n`.
/// - `dWorkspace`: Pointer to an allocated workspace in device memory of size lwork_bytes.
/// - `lwork_bytes`: Size of the allocated device workspace. Should be at least what was returned by [`cusolverDnIRSXgesv_bufferSize`] function.
/// - `niters`: If iter is * <0 : iterative refinement has failed, main precision (Inputs/Outputs precision) factorization has been performed if fallback is enabled. * -1 : taking into account machine parameters, n, nrhs, it is a priori not worth working in lower precision * -2 : overflow of an entry when moving from main to lower precision * -3 : failure during the factorization * -5 : overflow occurred during computation * -maxiter: solver stopped the iterative refinement after reaching maximum allowed iterations. * >0 : iter is a number of iterations solver performed to reach convergence criteria.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: CPU memory allocation failed, most likely during the allocation of the residual array that store the residual norms.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ARCH_MISMATCH`]: The IRS solver supports compute capability 7.0 and above. The lowest precision options CUSOLVER_\[CR\]_16BF and CUSOLVER_\[CR\]_TF32 are only available on compute capability 8.0 and above.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred, check the `dinfo` and the `niters` arguments for more details.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed, for example:
///
/// * `n<0`
/// * `lda<max(1,n)`
/// * `ldb<max(1,n)`
/// * `ldx<max(1,n)`.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_WORKSPACE`]: `lwork_bytes` is smaller than the required workspace. Could happen if the users called [`cusolverDnIRSXgesv_bufferSize`] function, then changed some of the configurations setting such as the lowest precision.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The information structure `gesv_irs_infos` was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_NOT_SUPPORTED`]: One of the configuration parameter in the `gesv_irs_params` structure is not supported. For example if nrhs >1, and refinement solver was set to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`].
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_OUT_OF_RANGE`]: Numerical error related to niters <0, see niters description for more details.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID`]: One of the configuration parameter in the `gesv_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER`]: The maxiter configuration parameter in the `gesv_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC`]: The main and/or the lowest precision configuration parameter in the `gesv_irs_params` structure is not valid, check the table above for the supported combinations.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE`]: The refinement solver configuration parameter in the `gesv_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The configuration parameter `gesv_irs_params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSXgesv(
handle: cusolverDnHandle_t,
gesv_irs_params: cusolverDnIRSParams_t,
gesv_irs_infos: cusolverDnIRSInfos_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut ::core::ffi::c_void,
ldda: cusolver_int_t,
dB: *mut ::core::ffi::c_void,
lddb: cusolver_int_t,
dX: *mut ::core::ffi::c_void,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
niters: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnIRSXgesv_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnIRSParams_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function is designed to perform same functionality as `cusolverDn<T1><T2>gels()` functions, but wrapped in a more generic and expert interface that gives user more control to parametrize the function as well as it provides more information on output. [`cusolverDnIRSXgels`] allows additional control of the solver parameters such as setting:
///
/// * the main precision (Inputs/Outputs precision) of the solver,
/// * the lowest precision to be used internally by the solver,
/// * the refinement solver type
/// * the maximum allowed number of iterations in the refinement phase
/// * the tolerance of the refinement solver
/// * the fallback to main precision
/// * and others
///
/// through the configuration parameters structure `gels_irs_params` and its helper functions. For more details about what configuration can be set and its meaning please refer to all the functions in the cuSolverDN Helper Function Section that start with `cusolverDnIRSParamsxxxx()`. Moreover, [`cusolverDnIRSXgels`] provides additional information on the output such as the convergence history (e.g., the residual norms) at each iteration and the number of iterations needed to converge. For more details about what information can be retrieved and its meaning please refer to all the functions in the cuSolverDN Helper Function Section that start with `cusolverDnIRSInfosxxxx()`.
///
/// The function returns value describes the results of the solving process. A [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] indicates that the function finished with success otherwise, it indicates if one of the API arguments is incorrect, or if the configurations of params/infos structure is incorrect or if the function did not finish with success. More details about the error can be found by checking the `niters` and the `dinfo` API parameters. See their description below for further details. Users should provide the required workspace allocated on device for the [`cusolverDnIRSXgels`] function. The amount of bytes required for the function can be queried by calling the respective function [`cusolverDnIRSXgels_bufferSize`]. Note that, if the user would like a particular configuration to be set via the params structure, it should be set before the call to [`cusolverDnIRSXgels_bufferSize`] to get the size of the required workspace.
///
/// The following table provides all possible combinations values for the lowest precision corresponding to the Inputs/Outputs data type. Note that if the lowest precision matches the Inputs/Outputs datatype, then main precision factorization will be used
///
/// Tensor Float (TF32), introduced with NVIDIA Ampere Architecture GPUs, is the most robust tensor core accelerated compute mode for the iterative refinement solver. It is able to solve the widest range of problems in HPC arising from different applications and provides up to 4X and 5X speedup for real and complex systems, respectively. On Volta and Turing architecture GPUs, half precision tensor core acceleration is recommended. In cases where the iterative refinement solver fails to converge to the desired accuracy (main precision, INOUT data precision), it is recommended to use main precision as internal lowest precision.
///
/// **Supported Inputs/Outputs data type and lower precision for the IRS solver**
///
/// | **Inputs/Outputs Data Type (e.g., main precision)** | **Supported values for the lowest precision** |
/// | --- | --- |
/// | [`cusolverPrecType_t::CUSOLVER_C_64F`] | `CUSOLVER_C_64F, CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_C_32F`] | `CUSOLVER_C_32F, CUSOLVER_C_16F, CUSOLVER_C_16BF, CUSOLVER_C_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_64F`] | `CUSOLVER_R_64F, CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
/// | [`cusolverPrecType_t::CUSOLVER_R_32F`] | `CUSOLVER_R_32F, CUSOLVER_R_16F, CUSOLVER_R_16BF, CUSOLVER_R_TF32` |
///
/// The [`cusolverDnIRSXgels_bufferSize`] function return the required workspace buffer size in bytes for the corresponding `cusolverDnXgels()` call with given `gels_irs_params` configuration.
///
/// * `n<0`
/// * `ldda<max(1,m)`
/// * `lddb<max(1,m)`
/// * `lddx<max(1,n)`.
///
/// # Parameters
///
/// - `handle`: Handle to the cusolverDn library context.
/// - `gels_irs_params`: Configuration parameters structure, can serve one or more calls to any IRS solver.
/// - `gels_irs_infos`: Info structure, where information about a particular solve will be stored. The `gels_irs_infos` structure correspond to a particular call. Thus different calls requires different `gels_irs_infos` structure otherwise, it will be overwritten.
/// - `m`: Number of rows of the matrix `A`. Should be non-negative and n<=m.
/// - `n`: Number of columns of the matrix `A`. Should be non-negative and n<=m.
/// - `nrhs`: Number of right hand sides to solve. Should be non-negative. Note that, `nrhs` is limited to 1 if the selected IRS refinement solver is [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`], [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES_GMRES`], [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_CLASSICAL_GMRES`].
/// - `dA`: Matrix `A` with size `m-by-n`. Can’t be `NULL`. On return - unchanged if the lowest precision is not equal to the main precision and the iterative refinement solver converged, - garbage otherwise.
/// - `ldda`: Leading dimension of two-dimensional array used to store matrix `A`. `ldda >= m`.
/// - `dB`: Set of right hand sides `B` of size `m-by-nrhs`. Can’t be `NULL`.
/// - `lddb`: Leading dimension of two-dimensional array used to store matrix of right hand sides `B`. `lddb >= max(1,m)`.
/// - `dX`: Set of solution vectors `X` of size `n-by-nrhs`. Can’t be `NULL`.
/// - `lddx`: Leading dimension of two-dimensional array used to store matrix of solution vectors `X`. `lddx >= max(1,n)`.
/// - `dWorkspace`: Pointer to an allocated workspace in device memory of size lwork_bytes.
/// - `lwork_bytes`: Size of the allocated device workspace. Should be at least what was returned by [`cusolverDnIRSXgels_bufferSize`] function.
/// - `niters`: If `iter` is * <0 : iterative refinement has failed, main precision (Inputs/Outputs precision) factorization has been performed if fallback is enabled * -1 : taking into account machine parameters, n, nrhs, it is a priori not worth working in lower precision * -2 : overflow of an entry when moving from main to lower precision * -3 : failure during the factorization * -5 : overflow occurred during computation * `-maxiter`: solver stopped the iterative refinement after reaching maximum allowed iterations * >0 : iter is a number of iterations solver performed to reach convergence criteria.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: CPU memory allocation failed, most likely during the allocation of the residual array that store the residual norms.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ARCH_MISMATCH`]: The IRS solver supports compute capability 7.0 and above. The lowest precision options CUSOLVER_\[CR\]_16BF and CUSOLVER_\[CR\]_TF32 are only available on compute capability 8.0 and above.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal error occurred, check the `dinfo` and the `niters` arguments for more details.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed, for example:
///
/// * `n<0`
/// * `ldda<max(1,m)`
/// * `lddb<max(1,m)`
/// * `lddx<max(1,n)`.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_WORKSPACE`]: `lwork_bytes` is smaller than the required workspace. Could happen if the users called [`cusolverDnIRSXgels_bufferSize`] function, then changed some of the configurations setting such as the lowest precision.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED`]: The information structure `gels_irs_infos` was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_NOT_SUPPORTED`]: One of the configuration parameter in the `gels_irs_params` structure is not supported. For example if nrhs >1, and refinement solver was set to [`cusolverIRSRefinement_t::CUSOLVER_IRS_REFINE_GMRES`].
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_OUT_OF_RANGE`]: Numerical error related to `niters` <0; see `niters` description for more details.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID`]: One of the configuration parameter in the `gels_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER`]: The maxiter configuration parameter in the `gels_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC`]: The main and/or the lowest precision configuration parameter in the `gels_irs_params` structure is not valid, check the table above for the supported combinations.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE`]: The refinement solver configuration parameter in the `gels_irs_params` structure is not valid.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED`]: The configuration parameter `gels_irs_params` structure was not created.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnIRSXgels(
handle: cusolverDnHandle_t,
gels_irs_params: cusolverDnIRSParams_t,
gels_irs_infos: cusolverDnIRSInfos_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
dA: *mut ::core::ffi::c_void,
ldda: cusolver_int_t,
dB: *mut ::core::ffi::c_void,
lddb: cusolver_int_t,
dX: *mut ::core::ffi::c_void,
lddx: cusolver_int_t,
dWorkspace: *mut ::core::ffi::c_void,
lwork_bytes: size_t,
niters: *mut cusolver_int_t,
d_info: *mut cusolver_int_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnIRSXgels_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnIRSParams_t,
m: cusolver_int_t,
n: cusolver_int_t,
nrhs: cusolver_int_t,
lwork_bytes: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSpotrf_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDpotrf_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCpotrf_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZpotrf_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a Hermitian positive-definite matrix.
///
/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `potrf_bufferSize()`.
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `devInfo` would indicate smallest leading minor of `A` which is not positive definite.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSpotrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
Workspace: *mut f32,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a Hermitian positive-definite matrix.
///
/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `potrf_bufferSize()`.
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `devInfo` would indicate smallest leading minor of `A` which is not positive definite.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDpotrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
Workspace: *mut f64,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a Hermitian positive-definite matrix.
///
/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `potrf_bufferSize()`.
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `devInfo` would indicate smallest leading minor of `A` which is not positive definite.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnCpotrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
Workspace: *mut cuComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a Hermitian positive-definite matrix.
///
/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `potrf_bufferSize()`.
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `devInfo` would indicate smallest leading minor of `A` which is not positive definite.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnZpotrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
Workspace: *mut cuDoubleComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a system of linear equations:
/// $$
/// A\\*X = B
/// $$
///
/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// The user has to call `potrf` first to factorize matrix `A`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^H$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSpotrs(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
B: *mut f32,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a system of linear equations:
/// $$
/// A\\*X = B
/// $$
///
/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// The user has to call `potrf` first to factorize matrix `A`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^H$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDpotrs(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
B: *mut f64,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a system of linear equations:
/// $$
/// A\\*X = B
/// $$
///
/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// The user has to call `potrf` first to factorize matrix `A`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^H$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnCpotrs(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a system of linear equations:
/// $$
/// A\\*X = B
/// $$
///
/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// The user has to call `potrf` first to factorize matrix `A`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^H$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnZpotrs(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The S and D data types are real valued single and double precision, respectively. Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
///
/// Each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `infoArray` would indicate smallest leading minor of `A` which is not positive definite.
///
/// `infoArray` is an integer array of size `batchsize`. If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], `infoArray\[0\] = -i` (less than zero), meaning that the `i-th` parameter is wrong (not counting handle). If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] but `infoArray\[i\] = k` is positive, then `i-th` matrix is not positive definite and the Cholesky factorization failed at row `k`.
///
/// Remark: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrfBatched`.
pub fn cusolverDnSpotrfBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
Aarray: *mut *mut f32,
lda: ::core::ffi::c_int,
infoArray: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The S and D data types are real valued single and double precision, respectively. Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
///
/// Each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `infoArray` would indicate smallest leading minor of `A` which is not positive definite.
///
/// `infoArray` is an integer array of size `batchsize`. If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], `infoArray\[0\] = -i` (less than zero), meaning that the `i-th` parameter is wrong (not counting handle). If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] but `infoArray\[i\] = k` is positive, then `i-th` matrix is not positive definite and the Cholesky factorization failed at row `k`.
///
/// Remark: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrfBatched`.
pub fn cusolverDnDpotrfBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
Aarray: *mut *mut f64,
lda: ::core::ffi::c_int,
infoArray: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The S and D data types are real valued single and double precision, respectively. Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
///
/// Each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `infoArray` would indicate smallest leading minor of `A` which is not positive definite.
///
/// `infoArray` is an integer array of size `batchsize`. If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], `infoArray\[0\] = -i` (less than zero), meaning that the `i-th` parameter is wrong (not counting handle). If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] but `infoArray\[i\] = k` is positive, then `i-th` matrix is not positive definite and the Cholesky factorization failed at row `k`.
///
/// Remark: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrfBatched`.
pub fn cusolverDnCpotrfBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
Aarray: *mut *mut cuComplex,
lda: ::core::ffi::c_int,
infoArray: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The S and D data types are real valued single and double precision, respectively. Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
///
/// Each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `infoArray` would indicate smallest leading minor of `A` which is not positive definite.
///
/// `infoArray` is an integer array of size `batchsize`. If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], `infoArray\[0\] = -i` (less than zero), meaning that the `i-th` parameter is wrong (not counting handle). If `potrfBatched` returns [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`] but `infoArray\[i\] = k` is positive, then `i-th` matrix is not positive definite and the Cholesky factorization failed at row `k`.
///
/// Remark: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrfBatched`.
pub fn cusolverDnZpotrfBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
Aarray: *mut *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
infoArray: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a sequence of linear systems:
/// $$
/// {A\lbrack i\rbrack}\\*{X\lbrack i\rbrack} = {B\lbrack i\rbrack}
/// $$
///
/// where each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// The user has to call `potrfBatched` first to factorize matrix `Aarray\[i\]`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^{H}$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// The output parameter `info` is a scalar. If `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark 1: only `nrhs=1` is supported.
///
/// Remark 2: `infoArray` from `potrfBatched` indicates if the matrix is positive definite. `info` from `potrsBatched` only shows which input parameter is wrong (not counting handle).
///
/// Remark 3: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
///
/// Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
pub fn cusolverDnSpotrsBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *mut *mut f32,
lda: ::core::ffi::c_int,
B: *mut *mut f32,
ldb: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a sequence of linear systems:
/// $$
/// {A\lbrack i\rbrack}\\*{X\lbrack i\rbrack} = {B\lbrack i\rbrack}
/// $$
///
/// where each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// The user has to call `potrfBatched` first to factorize matrix `Aarray\[i\]`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^{H}$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// The output parameter `info` is a scalar. If `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark 1: only `nrhs=1` is supported.
///
/// Remark 2: `infoArray` from `potrfBatched` indicates if the matrix is positive definite. `info` from `potrsBatched` only shows which input parameter is wrong (not counting handle).
///
/// Remark 3: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
///
/// Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
pub fn cusolverDnDpotrsBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *mut *mut f64,
lda: ::core::ffi::c_int,
B: *mut *mut f64,
ldb: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a sequence of linear systems:
/// $$
/// {A\lbrack i\rbrack}\\*{X\lbrack i\rbrack} = {B\lbrack i\rbrack}
/// $$
///
/// where each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// The user has to call `potrfBatched` first to factorize matrix `Aarray\[i\]`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^{H}$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// The output parameter `info` is a scalar. If `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark 1: only `nrhs=1` is supported.
///
/// Remark 2: `infoArray` from `potrfBatched` indicates if the matrix is positive definite. `info` from `potrsBatched` only shows which input parameter is wrong (not counting handle).
///
/// Remark 3: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
///
/// Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
pub fn cusolverDnCpotrsBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *mut *mut cuComplex,
lda: ::core::ffi::c_int,
B: *mut *mut cuComplex,
ldb: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a sequence of linear systems:
/// $$
/// {A\lbrack i\rbrack}\\*{X\lbrack i\rbrack} = {B\lbrack i\rbrack}
/// $$
///
/// where each `Aarray\[i\] for i=0,1,..., batchSize-1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used.
///
/// The user has to call `potrfBatched` first to factorize matrix `Aarray\[i\]`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^{H}$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// The output parameter `info` is a scalar. If `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark 1: only `nrhs=1` is supported.
///
/// Remark 2: `infoArray` from `potrfBatched` indicates if the matrix is positive definite. `info` from `potrsBatched` only shows which input parameter is wrong (not counting handle).
///
/// Remark 3: the other part of `A` is used as a workspace. For example, if `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
///
/// Please visit [cuSOLVER Library Samples - potrfBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/potrfBatched) for a code example.
pub fn cusolverDnZpotrsBatched(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *mut *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *mut *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSpotri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDpotri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCpotri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZpotri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the inverse of a positive-definite matrix `A` using the Cholesky factorization:
/// $$
/// A = L\\*L^H = U^{H}\\*U
/// $$
///
/// computed by `potrf()`.
///
/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization. Only lower or upper part is meaningful and the input parameter `uplo` indicates which part of the matrix is used. The function would leave the other part untouched.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced the by lower triangular part of the inverse of `A`.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by the upper triangular part of the inverse of `A`.
///
/// The user has to provide the working space which is pointed to by input parameter `Workspace`. The input parameter `Lwork` is the size of the working space, returned by `potri_bufferSize()`.
///
/// If the computation of the inverse fails, i.e. some leading minor of `L` or `U`, is null, the output parameter `devInfo` would indicate the smallest leading minor of `L` or `U` which is not positive definite.
///
/// If the output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting the handle).
pub fn cusolverDnSpotri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the inverse of a positive-definite matrix `A` using the Cholesky factorization:
/// $$
/// A = L\\*L^H = U^{H}\\*U
/// $$
///
/// computed by `potrf()`.
///
/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization. Only lower or upper part is meaningful and the input parameter `uplo` indicates which part of the matrix is used. The function would leave the other part untouched.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced the by lower triangular part of the inverse of `A`.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by the upper triangular part of the inverse of `A`.
///
/// The user has to provide the working space which is pointed to by input parameter `Workspace`. The input parameter `Lwork` is the size of the working space, returned by `potri_bufferSize()`.
///
/// If the computation of the inverse fails, i.e. some leading minor of `L` or `U`, is null, the output parameter `devInfo` would indicate the smallest leading minor of `L` or `U` which is not positive definite.
///
/// If the output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting the handle).
pub fn cusolverDnDpotri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the inverse of a positive-definite matrix `A` using the Cholesky factorization:
/// $$
/// A = L\\*L^H = U^{H}\\*U
/// $$
///
/// computed by `potrf()`.
///
/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization. Only lower or upper part is meaningful and the input parameter `uplo` indicates which part of the matrix is used. The function would leave the other part untouched.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced the by lower triangular part of the inverse of `A`.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by the upper triangular part of the inverse of `A`.
///
/// The user has to provide the working space which is pointed to by input parameter `Workspace`. The input parameter `Lwork` is the size of the working space, returned by `potri_bufferSize()`.
///
/// If the computation of the inverse fails, i.e. some leading minor of `L` or `U`, is null, the output parameter `devInfo` would indicate the smallest leading minor of `L` or `U` which is not positive definite.
///
/// If the output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting the handle).
pub fn cusolverDnCpotri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the necessary size of work buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the inverse of a positive-definite matrix `A` using the Cholesky factorization:
/// $$
/// A = L\\*L^H = U^{H}\\*U
/// $$
///
/// computed by `potrf()`.
///
/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization. Only lower or upper part is meaningful and the input parameter `uplo` indicates which part of the matrix is used. The function would leave the other part untouched.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced the by lower triangular part of the inverse of `A`.
///
/// If the input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by the upper triangular part of the inverse of `A`.
///
/// The user has to provide the working space which is pointed to by input parameter `Workspace`. The input parameter `Lwork` is the size of the working space, returned by `potri_bufferSize()`.
///
/// If the computation of the inverse fails, i.e. some leading minor of `L` or `U`, is null, the output parameter `devInfo` would indicate the smallest leading minor of `L` or `U` which is not positive definite.
///
/// If the output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting the handle).
pub fn cusolverDnZpotri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXtrtri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
diag: cublasDiagType_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffers.
///
/// The following routine:
///
/// computes the inverse of a triangular matrix using the generic API interface.
///
/// `A` is an $n \times n$ triangular matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular inverse.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular inverse.
///
/// The user has to provide device and host work spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` and `workspaceInBytesOnHost` are sizes in bytes of the device and host work spaces, and they are returned by [`cusolverDnXtrtri_bufferSize`].
///
/// If matrix inversion fails, the output parameter `info = i` shows `A(i,i) = 0`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Please visit [cuSOLVER Library Samples - Xtrtri](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xtrtri) for a code example.
///
/// List of input arguments for [`cusolverDnXtrtri_bufferSize`] and [`cusolverDnXtrtri`]:
///
/// **Valid data types**
///
/// | | |
/// | --- | --- |
/// | `DataTypeA` | `Meaning` |
/// | `CUDA_R_32F` | `STRTRI` |
/// | `CUDA_R_64F` | `DTRTRI` |
/// | `CUDA_C_32F` | `CTRTRI` |
/// | `CUDA_C_64F` | `ZTRTRI` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `uplo`: Indicates if matrix `A` lower or upper part is stored, the other part is not referenced.
/// - `diag`: The enumerated unit diagonal type.
/// - `n`: Number of rows and columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXtrtri_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXtrtri_bufferSize`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0` or `lda<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]: Data type is not supported.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXtrtri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
diag: cublasDiagType_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSlauum_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDlauum_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnClauum_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZlauum_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSlauum(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDlauum(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnClauum(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZlauum(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgetrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgetrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgetrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgetrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// The S and D data types are real single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the LU factorization of an $m \times n$ matrix:
/// $$
/// P\\*A = L\\*U
/// $$
///
/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `getrf_bufferSize()`.
///
/// If LU factorization failed, i.e. matrix `A` (`U`) is singular, The output parameter `devInfo=i` indicates `U(i,i) = 0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// If `devIpiv` is null, no pivoting is performed. The factorization is `A=L*U`, which is not numerically stable.
///
/// No matter LU factorization failed or not, the output parameter `devIpiv` contains pivoting sequence, row `i` is interchanged with row `devIpiv(i)`.
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
///
/// Remark: `getrf` uses fastest implementation with large workspace of size `m*n`. The user can choose the legacy implementation with minimal workspace by `Getrf` and `cusolverDnSetAdvOptions(params, CUSOLVERDN_GETRF, CUSOLVER_ALG_1)`.
pub fn cusolverDnSgetrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
Workspace: *mut f32,
devIpiv: *mut ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// The S and D data types are real single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the LU factorization of an $m \times n$ matrix:
/// $$
/// P\\*A = L\\*U
/// $$
///
/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `getrf_bufferSize()`.
///
/// If LU factorization failed, i.e. matrix `A` (`U`) is singular, The output parameter `devInfo=i` indicates `U(i,i) = 0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// If `devIpiv` is null, no pivoting is performed. The factorization is `A=L*U`, which is not numerically stable.
///
/// No matter LU factorization failed or not, the output parameter `devIpiv` contains pivoting sequence, row `i` is interchanged with row `devIpiv(i)`.
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
///
/// Remark: `getrf` uses fastest implementation with large workspace of size `m*n`. The user can choose the legacy implementation with minimal workspace by `Getrf` and `cusolverDnSetAdvOptions(params, CUSOLVERDN_GETRF, CUSOLVER_ALG_1)`.
pub fn cusolverDnDgetrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
Workspace: *mut f64,
devIpiv: *mut ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// The S and D data types are real single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the LU factorization of an $m \times n$ matrix:
/// $$
/// P\\*A = L\\*U
/// $$
///
/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `getrf_bufferSize()`.
///
/// If LU factorization failed, i.e. matrix `A` (`U`) is singular, The output parameter `devInfo=i` indicates `U(i,i) = 0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// If `devIpiv` is null, no pivoting is performed. The factorization is `A=L*U`, which is not numerically stable.
///
/// No matter LU factorization failed or not, the output parameter `devIpiv` contains pivoting sequence, row `i` is interchanged with row `devIpiv(i)`.
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
///
/// Remark: `getrf` uses fastest implementation with large workspace of size `m*n`. The user can choose the legacy implementation with minimal workspace by `Getrf` and `cusolverDnSetAdvOptions(params, CUSOLVERDN_GETRF, CUSOLVER_ALG_1)`.
pub fn cusolverDnCgetrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
Workspace: *mut cuComplex,
devIpiv: *mut ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// The S and D data types are real single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the LU factorization of an $m \times n$ matrix:
/// $$
/// P\\*A = L\\*U
/// $$
///
/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `getrf_bufferSize()`.
///
/// If LU factorization failed, i.e. matrix `A` (`U`) is singular, The output parameter `devInfo=i` indicates `U(i,i) = 0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// If `devIpiv` is null, no pivoting is performed. The factorization is `A=L*U`, which is not numerically stable.
///
/// No matter LU factorization failed or not, the output parameter `devIpiv` contains pivoting sequence, row `i` is interchanged with row `devIpiv(i)`.
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
///
/// Remark: `getrf` uses fastest implementation with large workspace of size `m*n`. The user can choose the legacy implementation with minimal workspace by `Getrf` and `cusolverDnSetAdvOptions(params, CUSOLVERDN_GETRF, CUSOLVER_ALG_1)`.
pub fn cusolverDnZgetrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
Workspace: *mut cuDoubleComplex,
devIpiv: *mut ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSlaswp(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
k1: ::core::ffi::c_int,
k2: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
incx: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDlaswp(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
k1: ::core::ffi::c_int,
k2: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
incx: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnClaswp(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
k1: ::core::ffi::c_int,
k2: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
incx: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZlaswp(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
k1: ::core::ffi::c_int,
k2: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
incx: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// This function solves a linear system of multiple right-hand sides:
/// $$
/// op(A)\\*X = B
/// $$
///
/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`. `B` is an $n\times {nrhs}$ right-hand side matrix.
///
/// The input parameter `trans` is defined by:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUBLAS_OP_N} \\
/// A^T & \text{if } trans = \text{CUBLAS_OP_T} \\
/// A^H & \text{if } trans = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The input parameter `devIpiv` is an output of `getrf`. It contains pivot indices, which are used to permutate right-hand sides.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
pub fn cusolverDnSgetrs(
handle: cusolverDnHandle_t,
trans: cublasOperation_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
B: *mut f32,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// This function solves a linear system of multiple right-hand sides:
/// $$
/// op(A)\\*X = B
/// $$
///
/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`. `B` is an $n\times {nrhs}$ right-hand side matrix.
///
/// The input parameter `trans` is defined by:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUBLAS_OP_N} \\
/// A^T & \text{if } trans = \text{CUBLAS_OP_T} \\
/// A^H & \text{if } trans = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The input parameter `devIpiv` is an output of `getrf`. It contains pivot indices, which are used to permutate right-hand sides.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
pub fn cusolverDnDgetrs(
handle: cusolverDnHandle_t,
trans: cublasOperation_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
B: *mut f64,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// This function solves a linear system of multiple right-hand sides:
/// $$
/// op(A)\\*X = B
/// $$
///
/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`. `B` is an $n\times {nrhs}$ right-hand side matrix.
///
/// The input parameter `trans` is defined by:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUBLAS_OP_N} \\
/// A^T & \text{if } trans = \text{CUBLAS_OP_T} \\
/// A^H & \text{if } trans = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The input parameter `devIpiv` is an output of `getrf`. It contains pivot indices, which are used to permutate right-hand sides.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
pub fn cusolverDnCgetrs(
handle: cusolverDnHandle_t,
trans: cublasOperation_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// Please visit [cuSOLVER Library Samples - getrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/getrf) for a code example.
///
/// This function solves a linear system of multiple right-hand sides:
/// $$
/// op(A)\\*X = B
/// $$
///
/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`. `B` is an $n\times {nrhs}$ right-hand side matrix.
///
/// The input parameter `trans` is defined by:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUBLAS_OP_N} \\
/// A^T & \text{if } trans = \text{CUBLAS_OP_T} \\
/// A^H & \text{if } trans = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The input parameter `devIpiv` is an output of `getrf`. It contains pivot indices, which are used to permutate right-hand sides.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `getrf` and `getrs` to complete a linear solver.
pub fn cusolverDnZgetrs(
handle: cusolverDnHandle_t,
trans: cublasOperation_t,
n: ::core::ffi::c_int,
nrhs: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
devIpiv: *const ::core::ffi::c_int,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgeqrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgeqrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgeqrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgeqrf_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the QR factorization of an $m \times n$ matrix:
/// $$
/// A = Q\\*R
/// $$
///
/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `geqrf_bufferSize()`.
///
/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
///
/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`. The leading nonzero element of householder vector is assumed to be 1 such that output parameter `TAU` contains the scaling factor `τ`. If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation:
/// $$
/// I - 2\\*v\\*v^{H} = I - \tau\\*q\\*q^{H}
/// $$
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSgeqrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
TAU: *mut f32,
Workspace: *mut f32,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the QR factorization of an $m \times n$ matrix:
/// $$
/// A = Q\\*R
/// $$
///
/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `geqrf_bufferSize()`.
///
/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
///
/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`. The leading nonzero element of householder vector is assumed to be 1 such that output parameter `TAU` contains the scaling factor `τ`. If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation:
/// $$
/// I - 2\\*v\\*v^{H} = I - \tau\\*q\\*q^{H}
/// $$
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDgeqrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
TAU: *mut f64,
Workspace: *mut f64,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the QR factorization of an $m \times n$ matrix:
/// $$
/// A = Q\\*R
/// $$
///
/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `geqrf_bufferSize()`.
///
/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
///
/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`. The leading nonzero element of householder vector is assumed to be 1 such that output parameter `TAU` contains the scaling factor `τ`. If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation:
/// $$
/// I - 2\\*v\\*v^{H} = I - \tau\\*q\\*q^{H}
/// $$
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnCgeqrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
TAU: *mut cuComplex,
Workspace: *mut cuComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the QR factorization of an $m \times n$ matrix:
/// $$
/// A = Q\\*R
/// $$
///
/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
///
/// The user has to provide working space which is pointed by input parameter `Workspace`. The input parameter `Lwork` is size of the working space, and it is returned by `geqrf_bufferSize()`.
///
/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
///
/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`. The leading nonzero element of householder vector is assumed to be 1 such that output parameter `TAU` contains the scaling factor `τ`. If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation:
/// $$
/// I - 2\\*v\\*v^{H} = I - \tau\\*q\\*q^{H}
/// $$
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnZgeqrf(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
TAU: *mut cuDoubleComplex,
Workspace: *mut cuDoubleComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSorgqr_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDorgqr_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungqr_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungqr_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed. Please visit [cuSOLVER Library Samples - orgqr](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/orgqr) for a code example.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `A` by:
/// $$
/// Q = {H(1)}\\*{H(2)}\\*{...}\\*{H(k)}
/// $$
///
/// where `Q` is a unitary matrix formed by a sequence of elementary reflection vectors stored in `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgqr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `geqrf`, `orgqr` to complete orthogonalization.
pub fn cusolverDnSorgqr(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
tau: *const f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed. Please visit [cuSOLVER Library Samples - orgqr](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/orgqr) for a code example.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `A` by:
/// $$
/// Q = {H(1)}\\*{H(2)}\\*{...}\\*{H(k)}
/// $$
///
/// where `Q` is a unitary matrix formed by a sequence of elementary reflection vectors stored in `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgqr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `geqrf`, `orgqr` to complete orthogonalization.
pub fn cusolverDnDorgqr(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
tau: *const f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungqr(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungqr(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSormqr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
C: *const f32,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDormqr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
C: *const f64,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCunmqr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
C: *const cuComplex,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZunmqr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
C: *const cuDoubleComplex,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed. Please visit [cuSOLVER Library Samples - ormqr](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/ormqr) for a code example.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `C` by:
/// $$
/// C =
/// \begin{cases}
/// \operatorname{op}(Q) * C & \text{if } side = \text{CUBLAS_SIDE_LEFT} \\
/// C * \operatorname{op}(Q) & \text{if } side = \text{CUBLAS_SIDE_RIGHT}
/// \end{cases}
/// $$
///
/// The operation of `Q` is defined by:
/// $$
/// \operatorname{op}(Q) =
/// \begin{cases}
/// Q & \text{if } transa = \text{CUBLAS_OP_N} \\
/// Q^T & \text{if } transa = \text{CUBLAS_OP_T} \\
/// Q^H & \text{if } transa = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// `Q` is a unitary matrix formed by a sequence of elementary reflection vectors from QR factorization (`geqrf`) of `A`.
///
/// `Q`=`H(1) ``H(2)` … `H(k)`
///
/// `Q` is of order `m` if `side` = [`cublasSideMode_t::CUBLAS_SIDE_LEFT`] and of order `n` if `side` = [`cublasSideMode_t::CUBLAS_SIDE_RIGHT`].
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `geqrf_bufferSize()` or `ormqr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `geqrf`, `ormqr` and `trsm` to complete a linear solver or a least-square solver.
pub fn cusolverDnSormqr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
C: *mut f32,
ldc: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed. Please visit [cuSOLVER Library Samples - ormqr](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/ormqr) for a code example.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `C` by:
/// $$
/// C =
/// \begin{cases}
/// \operatorname{op}(Q) * C & \text{if } side = \text{CUBLAS_SIDE_LEFT} \\
/// C * \operatorname{op}(Q) & \text{if } side = \text{CUBLAS_SIDE_RIGHT}
/// \end{cases}
/// $$
///
/// The operation of `Q` is defined by:
/// $$
/// \operatorname{op}(Q) =
/// \begin{cases}
/// Q & \text{if } transa = \text{CUBLAS_OP_N} \\
/// Q^T & \text{if } transa = \text{CUBLAS_OP_T} \\
/// Q^H & \text{if } transa = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// `Q` is a unitary matrix formed by a sequence of elementary reflection vectors from QR factorization (`geqrf`) of `A`.
///
/// `Q`=`H(1) ``H(2)` … `H(k)`
///
/// `Q` is of order `m` if `side` = [`cublasSideMode_t::CUBLAS_SIDE_LEFT`] and of order `n` if `side` = [`cublasSideMode_t::CUBLAS_SIDE_RIGHT`].
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `geqrf_bufferSize()` or `ormqr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine `geqrf`, `ormqr` and `trsm` to complete a linear solver or a least-square solver.
pub fn cusolverDnDormqr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
C: *mut f64,
ldc: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCunmqr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
C: *mut cuComplex,
ldc: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZunmqr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
C: *mut cuDoubleComplex,
ldc: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsytrf_bufferSize(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsytrf_bufferSize(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCsytrf_bufferSize(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZsytrf_bufferSize(
handle: cusolverDnHandle_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of the needed buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
///
/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. If `devIpiv` is null, no pivoting is performed, which is not numerically stable.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular factor `L` and block diagonal matrix `D`. Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
/// $$
/// P\\*A\\*P^{T} = L\\*D\\*L^{T}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular factor `U` and block diagonal matrix `D`.
/// $$
/// P\\*A\\*P^{T} = U\\*D\\*U^{T}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrf_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`. When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
///
/// If Bunch-Kaufman factorization failed, i.e. `A` is singular. The output parameter `devInfo = i` would indicate `D(i,i)=0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The output parameter `devIpiv` contains pivoting sequence. If `devIpiv(i) = k > 0`, `D(i,i)` is 1x1 block, and `i-th` row/column of `A` is interchanged with `k-th` row/column of `A`. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`] and `devIpiv(i-1) = devIpiv(i) = -m < 0`, `D(i-1:i,i-1:i)` is a 2x2 block, and `(i-1)-th` row/column is interchanged with `m-th` row/column. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] and `devIpiv(i+1) = devIpiv(i) = -m < 0`, `D(i:i+1,i:i+1)` is a 2x2 block, and `(i+1)-th` row/column is interchanged with `m-th` row/column.
pub fn cusolverDnSsytrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
ipiv: *mut ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of the needed buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
///
/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. If `devIpiv` is null, no pivoting is performed, which is not numerically stable.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular factor `L` and block diagonal matrix `D`. Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
/// $$
/// P\\*A\\*P^{T} = L\\*D\\*L^{T}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular factor `U` and block diagonal matrix `D`.
/// $$
/// P\\*A\\*P^{T} = U\\*D\\*U^{T}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrf_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`. When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
///
/// If Bunch-Kaufman factorization failed, i.e. `A` is singular. The output parameter `devInfo = i` would indicate `D(i,i)=0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The output parameter `devIpiv` contains pivoting sequence. If `devIpiv(i) = k > 0`, `D(i,i)` is 1x1 block, and `i-th` row/column of `A` is interchanged with `k-th` row/column of `A`. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`] and `devIpiv(i-1) = devIpiv(i) = -m < 0`, `D(i-1:i,i-1:i)` is a 2x2 block, and `(i-1)-th` row/column is interchanged with `m-th` row/column. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] and `devIpiv(i+1) = devIpiv(i) = -m < 0`, `D(i:i+1,i:i+1)` is a 2x2 block, and `(i+1)-th` row/column is interchanged with `m-th` row/column.
pub fn cusolverDnDsytrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
ipiv: *mut ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of the needed buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
///
/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. If `devIpiv` is null, no pivoting is performed, which is not numerically stable.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular factor `L` and block diagonal matrix `D`. Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
/// $$
/// P\\*A\\*P^{T} = L\\*D\\*L^{T}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular factor `U` and block diagonal matrix `D`.
/// $$
/// P\\*A\\*P^{T} = U\\*D\\*U^{T}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrf_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`. When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
///
/// If Bunch-Kaufman factorization failed, i.e. `A` is singular. The output parameter `devInfo = i` would indicate `D(i,i)=0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The output parameter `devIpiv` contains pivoting sequence. If `devIpiv(i) = k > 0`, `D(i,i)` is 1x1 block, and `i-th` row/column of `A` is interchanged with `k-th` row/column of `A`. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`] and `devIpiv(i-1) = devIpiv(i) = -m < 0`, `D(i-1:i,i-1:i)` is a 2x2 block, and `(i-1)-th` row/column is interchanged with `m-th` row/column. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] and `devIpiv(i+1) = devIpiv(i) = -m < 0`, `D(i:i+1,i:i+1)` is a 2x2 block, and `(i+1)-th` row/column is interchanged with `m-th` row/column.
pub fn cusolverDnCsytrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
ipiv: *mut ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of the needed buffers.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
///
/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. If `devIpiv` is null, no pivoting is performed, which is not numerically stable.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular factor `L` and block diagonal matrix `D`. Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
/// $$
/// P\\*A\\*P^{T} = L\\*D\\*L^{T}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular factor `U` and block diagonal matrix `D`.
/// $$
/// P\\*A\\*P^{T} = U\\*D\\*U^{T}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrf_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`. When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
///
/// If Bunch-Kaufman factorization failed, i.e. `A` is singular. The output parameter `devInfo = i` would indicate `D(i,i)=0`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The output parameter `devIpiv` contains pivoting sequence. If `devIpiv(i) = k > 0`, `D(i,i)` is 1x1 block, and `i-th` row/column of `A` is interchanged with `k-th` row/column of `A`. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`] and `devIpiv(i-1) = devIpiv(i) = -m < 0`, `D(i-1:i,i-1:i)` is a 2x2 block, and `(i-1)-th` row/column is interchanged with `m-th` row/column. If `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] and `devIpiv(i+1) = devIpiv(i) = -m < 0`, `D(i:i+1,i:i+1)` is a 2x2 block, and `(i+1)-th` row/column is interchanged with `m-th` row/column.
pub fn cusolverDnZsytrf(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
ipiv: *mut ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsytrs_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: i64,
nrhs: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
ipiv: *const i64,
dataTypeB: cudaDataType,
B: *mut ::core::ffi::c_void,
ldb: i64,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffers.
///
/// The following routine:
///
/// solves a system of linear equations using the generic API interface.
///
/// `A` contains the factorization from `cusolverDn<t>sytrf()`, only lower or upper part is meaningful, the other part is not touched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the details of the factorization are stores as:
/// $$
/// A = L\\*D\\*L^{T}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the details of the factorization are stores as:
/// $$
/// A = U\\*D\\*U^{T}
/// $$
///
/// The user has to provide the pivot indices that can be obtained by `cusolverDn<t>sytrf()` as well as device and host work spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` and `workspaceInBytesOnHost` are sizes in bytes of the device and host work spaces, and they are returned by [`cusolverDnXsytrs_bufferSize`].
/// To factorize and solve the symmetric system without pivoting, the user should set `devIpiv = NULL` when calling `cusolverDn<t>sytrf` and [`cusolverDnXsytrs`].
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// List of input arguments for [`cusolverDnXsytrs_bufferSize`] and [`cusolverDnXsytrs`]:
///
/// The generic API has two different types: `dataTypeA` is data type of the matrix `A`, `dataTypeB` is data type of the matrix `A`. [`cusolverDnXsytrs`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeB** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SSYTRS` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DSYTRS` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CSYTRS` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZSYTRS` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `uplo`: Indicates if matrix `A` lower or upper part is stored, the other part is not referenced.
/// - `n`: Number of rows and columns of matrix `A`.
/// - `nrhs`: Number of right-hand sides.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeB`: Data type of array `B`.
/// - `B`: Array of dimension `ldb * nrhs` with `ldb` is not less than `max(1,nrhs)`.
/// - `ldb`: Leading dimension of two-dimensional array used to store matrix `B`.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsytrs_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsytrs_bufferSize`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0` or `lda<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED`]: Data type is not supported.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsytrs(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: i64,
nrhs: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
ipiv: *const i64,
dataTypeB: cudaDataType,
B: *mut ::core::ffi::c_void,
ldb: i64,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsytri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsytri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCsytri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZsytri_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsytri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsytri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCsytri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZsytri(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
ipiv: *const ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgebrd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgebrd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgebrd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgebrd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
Lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general $m \times n$ matrix `A` to a real upper or lower bidiagonal form `B` by an orthogonal transformation: $Q^{H}\\*A\\*P = B$
///
/// If `m>=n`, `B` is upper bidiagonal; if `m<n`, `B` is lower bidiagonal.
///
/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
///
/// * if `m>=n`, the diagonal and the first superdiagonal are overwritten with the upper bidiagonal matrix `B`; the elements below the diagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the first superdiagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
/// * if `m<n`, the diagonal and the first subdiagonal are overwritten with the lower bidiagonal matrix `B`; the elements below the first subdiagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the diagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `Work`. The input parameter `Lwork` is size of the working space, and it is returned by `gebrd_bufferSize()`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark: `gebrd` only supports `m>=n`.
pub fn cusolverDnSgebrd(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
D: *mut f32,
E: *mut f32,
TAUQ: *mut f32,
TAUP: *mut f32,
Work: *mut f32,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general $m \times n$ matrix `A` to a real upper or lower bidiagonal form `B` by an orthogonal transformation: $Q^{H}\\*A\\*P = B$
///
/// If `m>=n`, `B` is upper bidiagonal; if `m<n`, `B` is lower bidiagonal.
///
/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
///
/// * if `m>=n`, the diagonal and the first superdiagonal are overwritten with the upper bidiagonal matrix `B`; the elements below the diagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the first superdiagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
/// * if `m<n`, the diagonal and the first subdiagonal are overwritten with the lower bidiagonal matrix `B`; the elements below the first subdiagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the diagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `Work`. The input parameter `Lwork` is size of the working space, and it is returned by `gebrd_bufferSize()`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark: `gebrd` only supports `m>=n`.
pub fn cusolverDnDgebrd(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
D: *mut f64,
E: *mut f64,
TAUQ: *mut f64,
TAUP: *mut f64,
Work: *mut f64,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general $m \times n$ matrix `A` to a real upper or lower bidiagonal form `B` by an orthogonal transformation: $Q^{H}\\*A\\*P = B$
///
/// If `m>=n`, `B` is upper bidiagonal; if `m<n`, `B` is lower bidiagonal.
///
/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
///
/// * if `m>=n`, the diagonal and the first superdiagonal are overwritten with the upper bidiagonal matrix `B`; the elements below the diagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the first superdiagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
/// * if `m<n`, the diagonal and the first subdiagonal are overwritten with the lower bidiagonal matrix `B`; the elements below the first subdiagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the diagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `Work`. The input parameter `Lwork` is size of the working space, and it is returned by `gebrd_bufferSize()`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark: `gebrd` only supports `m>=n`.
pub fn cusolverDnCgebrd(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
D: *mut f32,
E: *mut f32,
TAUQ: *mut cuComplex,
TAUP: *mut cuComplex,
Work: *mut cuComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general $m \times n$ matrix `A` to a real upper or lower bidiagonal form `B` by an orthogonal transformation: $Q^{H}\\*A\\*P = B$
///
/// If `m>=n`, `B` is upper bidiagonal; if `m<n`, `B` is lower bidiagonal.
///
/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
///
/// * if `m>=n`, the diagonal and the first superdiagonal are overwritten with the upper bidiagonal matrix `B`; the elements below the diagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the first superdiagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
/// * if `m<n`, the diagonal and the first subdiagonal are overwritten with the lower bidiagonal matrix `B`; the elements below the first subdiagonal, with the array `TAUQ`, represent the orthogonal matrix `Q` as a product of elementary reflectors, and the elements above the diagonal, with the array `TAUP`, represent the orthogonal matrix `P` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `Work`. The input parameter `Lwork` is size of the working space, and it is returned by `gebrd_bufferSize()`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Remark: `gebrd` only supports `m>=n`.
pub fn cusolverDnZgebrd(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
D: *mut f64,
E: *mut f64,
TAUQ: *mut cuDoubleComplex,
TAUP: *mut cuDoubleComplex,
Work: *mut cuDoubleComplex,
Lwork: ::core::ffi::c_int,
devInfo: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSorgbr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDorgbr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungbr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungbr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function generates one of the unitary matrices `Q` or `P**H` determined by `gebrd` when reducing a matrix A to bidiagonal form: $Q^{H}\\*A\\*P = B$
///
/// `Q` and `P**H` are defined as products of elementary reflectors H(i) or G(i) respectively.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgbr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSorgbr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
tau: *const f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function generates one of the unitary matrices `Q` or `P**H` determined by `gebrd` when reducing a matrix A to bidiagonal form: $Q^{H}\\*A\\*P = B$
///
/// `Q` and `P**H` are defined as products of elementary reflectors H(i) or G(i) respectively.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgbr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDorgbr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
tau: *const f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungbr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungbr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
k: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsytrd_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
d: *const f32,
e: *const f32,
tau: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsytrd_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
d: *const f64,
e: *const f64,
tau: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChetrd_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
d: *const f32,
e: *const f32,
tau: *const cuComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhetrd_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
d: *const f64,
e: *const f64,
tau: *const cuDoubleComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to real symmetric tridiagonal form `T` by an orthogonal transformation: $Q^{H}\\*A\\*Q = T$
///
/// As an output, `A` contains `T` and householder reflection vectors. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the diagonal and first superdiagonal of `A` are overwritten by the corresponding elements of the tridiagonal matrix `T`, and the elements above the first superdiagonal, with the array `tau`, represent the orthogonal matrix `Q` as a product of elementary reflectors; If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the diagonal and first subdiagonal of `A` are overwritten by the corresponding elements of the tridiagonal matrix `T`, and the elements below the first subdiagonal, with the array `tau`, represent the orthogonal matrix `Q` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). Note that the problem size `n` is limited by a condition `n*lda < INT32_MAX` primarily due to the current implementation constraints.
pub fn cusolverDnSsytrd(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
d: *mut f32,
e: *mut f32,
tau: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to real symmetric tridiagonal form `T` by an orthogonal transformation: $Q^{H}\\*A\\*Q = T$
///
/// As an output, `A` contains `T` and householder reflection vectors. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the diagonal and first superdiagonal of `A` are overwritten by the corresponding elements of the tridiagonal matrix `T`, and the elements above the first superdiagonal, with the array `tau`, represent the orthogonal matrix `Q` as a product of elementary reflectors; If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the diagonal and first subdiagonal of `A` are overwritten by the corresponding elements of the tridiagonal matrix `T`, and the elements below the first subdiagonal, with the array `tau`, represent the orthogonal matrix `Q` as a product of elementary reflectors.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sytrd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). Note that the problem size `n` is limited by a condition `n*lda < INT32_MAX` primarily due to the current implementation constraints.
pub fn cusolverDnDsytrd(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
d: *mut f64,
e: *mut f64,
tau: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChetrd(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
d: *mut f32,
e: *mut f32,
tau: *mut cuComplex,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhetrd(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
d: *mut f64,
e: *mut f64,
tau: *mut cuDoubleComplex,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSorgtr_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDorgtr_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungtr_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungtr_bufferSize(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function generates a unitary matrix `Q` which is defined as the product of n-1 elementary reflectors of order n, as returned by `sytrd`:
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgtr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSorgtr(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
tau: *const f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function generates a unitary matrix `Q` which is defined as the product of n-1 elementary reflectors of order n, as returned by `sytrd`:
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `orgtr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDorgtr(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
tau: *const f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCungtr(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZungtr(
handle: cusolverDnHandle_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSormtr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
tau: *const f32,
C: *const f32,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDormtr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
tau: *const f64,
C: *const f64,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCunmtr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
tau: *const cuComplex,
C: *const cuComplex,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZunmtr_bufferSize(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *const cuDoubleComplex,
C: *const cuDoubleComplex,
ldc: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `C` by:
/// $$
/// C =
/// \begin{cases}
/// \operatorname{op}(Q) * C & \text{if } side = \text{CUBLAS_SIDE_LEFT} \\
/// C * \operatorname{op}(Q) & \text{if } side = \text{CUBLAS_SIDE_RIGHT}
/// \end{cases}
/// $$
///
/// where `Q` is a unitary matrix formed by a sequence of elementary reflection vectors from `sytrd`.
///
/// The operation on `Q` is defined by:
/// $$
/// \operatorname{op}(Q) =
/// \begin{cases}
/// Q & \text{if } transa = \text{CUBLAS_OP_N} \\
/// Q^T & \text{if } transa = \text{CUBLAS_OP_T} \\
/// Q^H & \text{if } transa = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `ormtr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnSormtr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
tau: *mut f32,
C: *mut f32,
ldc: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// These helper functions calculate the size of work buffers needed.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function overwrites $m \times n$ matrix `C` by:
/// $$
/// C =
/// \begin{cases}
/// \operatorname{op}(Q) * C & \text{if } side = \text{CUBLAS_SIDE_LEFT} \\
/// C * \operatorname{op}(Q) & \text{if } side = \text{CUBLAS_SIDE_RIGHT}
/// \end{cases}
/// $$
///
/// where `Q` is a unitary matrix formed by a sequence of elementary reflection vectors from `sytrd`.
///
/// The operation on `Q` is defined by:
/// $$
/// \operatorname{op}(Q) =
/// \begin{cases}
/// Q & \text{if } transa = \text{CUBLAS_OP_N} \\
/// Q^T & \text{if } transa = \text{CUBLAS_OP_T} \\
/// Q^H & \text{if } transa = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `ormtr_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
pub fn cusolverDnDormtr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
tau: *mut f64,
C: *mut f64,
ldc: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCunmtr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
tau: *mut cuComplex,
C: *mut cuComplex,
ldc: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZunmtr(
handle: cusolverDnHandle_t,
side: cublasSideMode_t,
uplo: cublasFillMode_t,
trans: cublasOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
tau: *mut cuDoubleComplex,
C: *mut cuDoubleComplex,
ldc: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgesvd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgesvd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgesvd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgesvd_bufferSize(
handle: cusolverDnHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `gesvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). if `bdsqr` did not converge, `devInfo` specifies how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// The `rwork` is real array of dimension (min(m,n)-1). If `devInfo`>0 and `rwork` is not NULL, `rwork` contains the unconverged superdiagonal elements of an upper bidiagonal matrix. This is slightly different from LAPACK which puts unconverged superdiagonal elements in `work` if type is `real`; in `rwork` if type is `complex`. `rwork` can be a NULL pointer if the user does not want the information from superdiagonal.
///
/// Please visit [cuSOLVER Library Samples - gesvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvd) for a code example.
///
/// Remark 1: `gesvd` only supports `m>=n`.
///
/// Remark 2: the routine returns $V^{H}$, not `V`.
pub fn cusolverDnSgesvd(
handle: cusolverDnHandle_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut f32,
ldu: ::core::ffi::c_int,
VT: *mut f32,
ldvt: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
rwork: *mut f32,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `gesvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). if `bdsqr` did not converge, `devInfo` specifies how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// The `rwork` is real array of dimension (min(m,n)-1). If `devInfo`>0 and `rwork` is not NULL, `rwork` contains the unconverged superdiagonal elements of an upper bidiagonal matrix. This is slightly different from LAPACK which puts unconverged superdiagonal elements in `work` if type is `real`; in `rwork` if type is `complex`. `rwork` can be a NULL pointer if the user does not want the information from superdiagonal.
///
/// Please visit [cuSOLVER Library Samples - gesvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvd) for a code example.
///
/// Remark 1: `gesvd` only supports `m>=n`.
///
/// Remark 2: the routine returns $V^{H}$, not `V`.
pub fn cusolverDnDgesvd(
handle: cusolverDnHandle_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut f64,
ldu: ::core::ffi::c_int,
VT: *mut f64,
ldvt: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
rwork: *mut f64,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `gesvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). if `bdsqr` did not converge, `devInfo` specifies how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// The `rwork` is real array of dimension (min(m,n)-1). If `devInfo`>0 and `rwork` is not NULL, `rwork` contains the unconverged superdiagonal elements of an upper bidiagonal matrix. This is slightly different from LAPACK which puts unconverged superdiagonal elements in `work` if type is `real`; in `rwork` if type is `complex`. `rwork` can be a NULL pointer if the user does not want the information from superdiagonal.
///
/// Please visit [cuSOLVER Library Samples - gesvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvd) for a code example.
///
/// Remark 1: `gesvd` only supports `m>=n`.
///
/// Remark 2: the routine returns $V^{H}$, not `V`.
pub fn cusolverDnCgesvd(
handle: cusolverDnHandle_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut cuComplex,
ldu: ::core::ffi::c_int,
VT: *mut cuComplex,
ldvt: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
rwork: *mut f32,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `gesvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). if `bdsqr` did not converge, `devInfo` specifies how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// The `rwork` is real array of dimension (min(m,n)-1). If `devInfo`>0 and `rwork` is not NULL, `rwork` contains the unconverged superdiagonal elements of an upper bidiagonal matrix. This is slightly different from LAPACK which puts unconverged superdiagonal elements in `work` if type is `real`; in `rwork` if type is `complex`. `rwork` can be a NULL pointer if the user does not want the information from superdiagonal.
///
/// Please visit [cuSOLVER Library Samples - gesvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvd) for a code example.
///
/// Remark 1: `gesvd` only supports `m>=n`.
///
/// Remark 2: the routine returns $V^{H}$, not `V`.
pub fn cusolverDnZgesvd(
handle: cusolverDnHandle_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut cuDoubleComplex,
ldu: ::core::ffi::c_int,
VT: *mut cuDoubleComplex,
ldvt: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
rwork: *mut f64,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsyevd_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsyevd_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevd_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevd_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real $n \times n$ diagonal matrix. `V` is an $n \times n$ unitary matrix. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `syevd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - syevd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevd) for a code example.
pub fn cusolverDnSsyevd(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real $n \times n$ diagonal matrix. `V` is an $n \times n$ unitary matrix. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `syevd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - syevd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevd) for a code example.
pub fn cusolverDnDsyevd(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevd(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevd(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsyevdx_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsyevdx_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevdx_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevdx_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes all or selection of the eigenvalues and optionally eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real `n×h_meig` diagonal matrix. `V` is an `n×h_meig` unitary matrix. `h_meig` is the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (e.g., `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `syevdx_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - syevdx](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevdx) for a code example.
pub fn cusolverDnSsyevdx(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes all or selection of the eigenvalues and optionally eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real `n×h_meig` diagonal matrix. `V` is an `n×h_meig` unitary matrix. `h_meig` is the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (e.g., `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `syevdx_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - syevdx](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevdx) for a code example.
pub fn cusolverDnDsyevdx(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevdx(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevdx(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsygvdx_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
B: *const f32,
ldb: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsygvdx_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
B: *const f64,
ldb: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvdx_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvdx_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes all or selection of the eigenvalues and optionally eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times {h_meig}$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times {h_meig}$ orthogonal matrix. `h_meig` is the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (for example, `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sygvdx_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `devInfo = n + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors of the matrix `A`. The eigenvectors are computed by divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - sygvdx](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvdx) for a code example.
pub fn cusolverDnSsygvdx(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
B: *mut f32,
ldb: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes all or selection of the eigenvalues and optionally eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times {h_meig}$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times {h_meig}$ orthogonal matrix. `h_meig` is the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (for example, `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sygvdx_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `devInfo = n + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors of the matrix `A`. The eigenvectors are computed by divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - sygvdx](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvdx) for a code example.
pub fn cusolverDnDsygvdx(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
B: *mut f64,
ldb: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvdx(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
vl: f32,
vu: f32,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvdx(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
vl: f64,
vu: f64,
il: ::core::ffi::c_int,
iu: ::core::ffi::c_int,
meig: *mut ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsygvd_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
B: *const f32,
ldb: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsygvd_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
B: *const f64,
ldb: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvd_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvd_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times n$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sygvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `devInfo = N + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors of the matrix `A`. The eigenvectors are computed by divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - sygvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvd) for a code example.
pub fn cusolverDnSsygvd(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
B: *mut f32,
ldb: ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times n$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is size of the working space, and it is returned by `sygvd_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `devInfo = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `devInfo = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `devInfo = N + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors of the matrix `A`. The eigenvectors are computed by divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - sygvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvd) for a code example.
pub fn cusolverDnDsygvd(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
B: *mut f64,
ldb: ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvd(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvd(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsygvd_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
d_A: *const ::core::ffi::c_void,
lda: i64,
dataTypeB: cudaDataType,
d_B: *const ::core::ffi::c_void,
ldb: i64,
dataTypeW: cudaDataType,
d_W: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine computes all the eigenvalues, and optionally, the eigenvectors of a generalized symmetric (Hermitian) definite eigenproblem.
///
/// The generalized symmetric (Hermitian) definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `A` and `B` are $n \times n$; A is symmetric/Hermitian and B is symmetric/Hermitian positive definite. The eigenvalues of (`A`, `B`) are computed and stored in the `W` vector in ascending order. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:v:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXsygvd_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `info = n + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Currently, [`cusolverDnXsygvd`] supports only the default algorithm.
///
/// **Algorithms supported by cusolverDnXsygvd**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXsygvd_bufferSize`] and [`cusolverDnXsygvd`]:
///
/// The generic API has four different data types, `dataTypeA` is data type of the matrix `A`, `dataTypeB` is data type of the matrix `B`, `dataTypeW` is data type of the matrix `W` and `computeType` is compute type of the operation. [`cusolverDnXsygvd`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeB** | **DataTypeW** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SSYGVD` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DSYGVD` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CHEGVD` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `ZHEGVD` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `itype`: Specifies the problem type to be solved: * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_1`]: A\*x = (lambda)\*B\*x. * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_2`]: A\*B\*x = (lambda)\*x. * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_3`]: B\*A\*x = (lambda)\*x.
/// - `jobz`: Specifies options to either compute eigenvalue only or compute eigen-pair: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute eigenvalues only; `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute eigenvalues and eigenvectors.
/// - `uplo`: Specifies which part of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`]: Lower triangle of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]: Upper triangle of `A` is stored.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeB`: Data type of array `B`.
/// - `ldb`: Leading dimension of two-dimensional array used to store matrix `B`.
/// - `dataTypeW`: Data type of array `W`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsygvd_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsygvd_bufferSize`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, or `lda<max(1,n)`, or `ldb<max(1,n)`, or `itype` is not 1, 2 or 3, or `jobz` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `uplo` is not [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] or [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]), or the combination of `dataType{A,B,C}` and `computeType` are not supported.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsygvd(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
d_A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeB: cudaDataType,
d_B: *mut ::core::ffi::c_void,
ldb: i64,
dataTypeW: cudaDataType,
d_W: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
d_info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsygvdx_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
d_A: *const ::core::ffi::c_void,
lda: i64,
dataTypeB: cudaDataType,
d_B: *const ::core::ffi::c_void,
ldb: i64,
vl: *mut ::core::ffi::c_void,
vu: *mut ::core::ffi::c_void,
il: i64,
iu: i64,
meig: *mut i64,
dataTypeW: cudaDataType,
d_W: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper function below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine computes all or selection of the eigenvalues, and optionally, the eigenvectors of a generalized symmetric (Hermitian) definite eigenproblem.
///
/// The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `A` and `B` are $n \times n$; A is symmetric/Hermitian and B is symmetric/Hermitian positive definite. The eigenvalues of (`A`, `B`) are computed and stored in the `W` vector in ascending order. `h_meig` represents the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (for example, `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXsygvdx_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (i > 0 and i<=n) and `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero. If `info = n + i` (i > 0), then the leading minor of order `i` of `B` is not positive definite. The factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors of the matrix `A`. The eigenvectors are computed by divide and conquer algorithm.
///
/// **Algorithms supported by cusolverDnXsygvdx**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXsygvdx_bufferSize`] and [`cusolverDnXsygvdx`]:
///
/// The generic API has four different types, `dataTypeA` is data type of the matrix `A`, `dataTypeB` is data type of the matrix `B`, `dataTypeW` is data type of the matrix `W` and `computeType` is compute type of the operation. [`cusolverDnXsygvdx`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeB** | **DataTypeW** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SSYGVDX` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DSYGVDX` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CHEGVDX` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `ZHEGVDX` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `itype`: Specifies the problem type to be solved: * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_1`]: A\*x = (lambda)\*B\*x. * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_2`]: A\*B\*x = (lambda)\*x. * `itype`=[`cusolverEigType_t::CUSOLVER_EIG_TYPE_3`]: B\*A\*x = (lambda)\*x.
/// - `jobz`: Specifies options to either compute eigenvalue only or compute eigen-pair: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute eigenvalues only; `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute eigenvalues and eigenvectors.
/// - `range`: Specifies options to which selection of eigenvalues and optionally eigenvectors that need to be computed: `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]: all eigenvalues/eigenvectors will be found, will becomes the classical sygvd/hegvd routine; `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_V`]: all eigenvalues/eigenvectors in the half-open interval (vl,vu] will be found; `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_I`]: the il-th through iu-th eigenvalues/eigenvectors will be found;.
/// - `uplo`: Specifies which part of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`]: Lower triangle of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]: Upper triangle of `A` is stored.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.`lda` is not less than `max(1,n)`.
/// - `dataTypeB`: Data type of array `B`.
/// - `ldb`: Leading dimension of two-dimensional array used to store matrix `B`.
/// - `dataTypeW`: Data type of array `W`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsygvdx_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsygvdx_bufferSize`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, or `lda<max(1,n)`, or `ldb<max(1,n)`, or `itype` is not 1, 2 or 3, or `jobz` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `range` is not [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`] or [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_V`] or [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_I`], or `uplo` is not [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] or [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]), or the combination of `dataType{A,B,C}` and `computeType` are not supported.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsygvdx(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
d_A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeB: cudaDataType,
d_B: *mut ::core::ffi::c_void,
ldb: i64,
vl: *mut ::core::ffi::c_void,
vu: *mut ::core::ffi::c_void,
il: i64,
iu: i64,
meig: *mut i64,
dataTypeW: cudaDataType,
d_W: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
d_info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the structure of `syevj`, `syevjBatched` and `sygvj` to default values.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `syevj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The structure was initialized successfully.
pub fn cusolverDnCreateSyevjInfo(info: *mut syevjInfo_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: The structure of `syevj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The resources were released successfully.
pub fn cusolverDnDestroySyevjInfo(info: syevjInfo_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function configures tolerance of `syevj`.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `syevj`.
/// - `tolerance`: Accuracy of numerical eigenvalues.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevjSetTolerance(
info: syevjInfo_t,
tolerance: f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function configures maximum number of sweeps in `syevj`. The default value is 100.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `syevj`.
/// - `max_sweeps`: Maximum number of sweeps.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevjSetMaxSweeps(
info: syevjInfo_t,
max_sweeps: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// If `sort_eig` is zero, the eigenvalues are not sorted. This function only works for `syevjBatched`. `syevj` and `sygvj` always sort eigenvalues in ascending order. By default, eigenvalues are always sorted in ascending order.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of syevj.
/// - `sort_eig`: If `sort_eig` is zero, the eigenvalues are not sorted.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevjSetSortEig(
info: syevjInfo_t,
sort_eig: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function reports residual of `syevj` or `sygvj`. It does not support `syevjBatched`. If the user calls this function after `syevjBatched`, the error [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`] is returned.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `info`: The pointer to the structure of `syevj`.
/// - `residual`: Residual of `syevj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]: Does not support batched version.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevjGetResidual(
handle: cusolverDnHandle_t,
info: syevjInfo_t,
residual: *mut f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function reports number of executed sweeps of `syevj` or `sygvj`. It does not support `syevjBatched`. If the user calls this function after `syevjBatched`, the error [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`] is returned.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `info`: The pointer to the structure of `syevj`.
/// - `executed_sweeps`: Number of executed sweeps.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]: Does not support batched version.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevjGetSweeps(
handle: cusolverDnHandle_t,
info: syevjInfo_t,
executed_sweeps: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsyevjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsyevjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a sequence of symmetric (Hermitian) $n \times n$ matrices:
/// $$
/// A_{j}\\*Q_{j} = Q_{j}\\*\Lambda_{j}
/// $$
///
/// where $\Lambda_{j}$ is a real $n \times n$ diagonal matrix. $Q_j$ is an $n \times n$ unitary matrix. The diagonal elements of $\Lambda_j$ are the eigenvalues of $A_j$ in either ascending order or non-sorting order.
///
/// `syevjBatched` performs `syevj` on each matrix. It requires that all matrices are of the same size `n` and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `W` also contains eigenvalues of each matrix in contiguous way,
/// $$
/// \begin{split}W = \begin{pmatrix}
/// {W0} & {W1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `W` is $W_{k}\operatorname{(j)} = {W\lbrack\ j\ +\ n\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `syevjBatched` can either sort the eigenvalues in ascending order (default) or chose as-is (without sorting) by the function [`cusolverDnXsyevjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate spectrum of those tiny matrices.
///
/// `syevjBatched` cannot report residual and executed sweeps by function [`cusolverDnXsyevjGetResidual`] and [`cusolverDnXsyevjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `syevjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = n+1`, `syevjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], $A_j$ contains the orthonormal eigenvectors $V_j$.
///
/// Please visit [cuSOLVER Library Samples - syevjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevjBatched) for a code example.
pub fn cusolverDnSsyevjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a sequence of symmetric (Hermitian) $n \times n$ matrices:
/// $$
/// A_{j}\\*Q_{j} = Q_{j}\\*\Lambda_{j}
/// $$
///
/// where $\Lambda_{j}$ is a real $n \times n$ diagonal matrix. $Q_j$ is an $n \times n$ unitary matrix. The diagonal elements of $\Lambda_j$ are the eigenvalues of $A_j$ in either ascending order or non-sorting order.
///
/// `syevjBatched` performs `syevj` on each matrix. It requires that all matrices are of the same size `n` and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `W` also contains eigenvalues of each matrix in contiguous way,
/// $$
/// \begin{split}W = \begin{pmatrix}
/// {W0} & {W1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `W` is $W_{k}\operatorname{(j)} = {W\lbrack\ j\ +\ n\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `syevjBatched` can either sort the eigenvalues in ascending order (default) or chose as-is (without sorting) by the function [`cusolverDnXsyevjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate spectrum of those tiny matrices.
///
/// `syevjBatched` cannot report residual and executed sweeps by function [`cusolverDnXsyevjGetResidual`] and [`cusolverDnXsyevjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `syevjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = n+1`, `syevjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], $A_j$ contains the orthonormal eigenvectors $V_j$.
///
/// Please visit [cuSOLVER Library Samples - syevjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevjBatched) for a code example.
pub fn cusolverDnDsyevjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsyevj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsyevj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*Q = Q\\*\Lambda
/// $$
///
/// where `Λ` is a real $n \times n$ diagonal matrix. `Q` is an $n \times n$ unitary matrix. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// `syevj` has the same functionality as `syevd`. The difference is that `syevd` uses QR algorithm and `syevj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `syevj` to perform approximation up to certain accuracy.
///
/// How does it work?
///
/// `syevj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// V^{H}\\*A\\*V = W + E
/// $$
///
/// where `W` is diagonal and `E` is symmetric without diagonal.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `W` is the set of eigenvalues. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is the given tolerance.
///
/// `syevj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXsyevjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXsyevjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `syevj` stops either tolerance is met or maximum number of sweeps is met.
///
/// The Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// After `syevj`, the user can query residual by function [`cusolverDnXsyevjGetResidual`] and number of executed sweeps by function [`cusolverDnXsyevjGetSweeps`]. However the user needs to be aware that residual is the Frobenius norm of `E`, not accuracy of individual eigenvalue, i.e.
/// $$
/// {residual}={\\|E\\|}_{F} = {{\\|}\Lambda - W{\\|}}_{F}
/// $$
///
/// The same as `syevd`, the user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `syevj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = n+1`, `syevj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `syevj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors `V`.
///
/// Please visit [cuSOLVER Library Samples - syevj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevj) for a code example.
pub fn cusolverDnSsyevj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A`. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*Q = Q\\*\Lambda
/// $$
///
/// where `Λ` is a real $n \times n$ diagonal matrix. `Q` is an $n \times n$ unitary matrix. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// `syevj` has the same functionality as `syevd`. The difference is that `syevd` uses QR algorithm and `syevj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `syevj` to perform approximation up to certain accuracy.
///
/// How does it work?
///
/// `syevj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// V^{H}\\*A\\*V = W + E
/// $$
///
/// where `W` is diagonal and `E` is symmetric without diagonal.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `W` is the set of eigenvalues. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is the given tolerance.
///
/// `syevj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXsyevjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXsyevjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `syevj` stops either tolerance is met or maximum number of sweeps is met.
///
/// The Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// After `syevj`, the user can query residual by function [`cusolverDnXsyevjGetResidual`] and number of executed sweeps by function [`cusolverDnXsyevjGetSweeps`]. However the user needs to be aware that residual is the Frobenius norm of `E`, not accuracy of individual eigenvalue, i.e.
/// $$
/// {residual}={\\|E\\|}_{F} = {{\\|}\Lambda - W{\\|}}_{F}
/// $$
///
/// The same as `syevd`, the user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `syevj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = n+1`, `syevj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `syevj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors `V`.
///
/// Please visit [cuSOLVER Library Samples - syevj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/syevj) for a code example.
pub fn cusolverDnDsyevj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCheevj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZheevj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSsygvj_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
B: *const f32,
ldb: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDsygvj_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
B: *const f64,
ldb: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvj_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
W: *const f32,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvj_bufferSize(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
W: *const f64,
lwork: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times n$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// This function has the same functionality as `sygvd` except that `syevd` in `sygvd` is replaced by `syevj` in `sygvj`. Therefore, `sygvj` inherits properties of `syevj`, the user can use [`cusolverDnXsyevjSetTolerance`] and [`cusolverDnXsyevjSetMaxSweeps`] to configure tolerance and maximum sweeps.
///
/// However the meaning of residual is different from `syevj`. `sygvj` first computes Cholesky factorization of matrix `B`,
/// $$
/// B = L\\*L^{H}
/// $$
///
/// transform the problem to standard eigenvalue problem, then calls `syevj`.
///
/// For example, the standard eigenvalue problem of type I is:
/// $$
/// M\\*Q = Q\\*\Lambda
/// $$
///
/// where matrix `M` is symmetric:
/// $$
/// M = L^{-1}\\*A\\*L^{-H}
/// $$
///
/// The residual is the result of `syevj` on matrix `M`, not `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `sygvj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (i > 0 and i<=n), `B` is not positive definite, the factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed. If `info = n+1`, `syevj` does not converge under given tolerance and maximum sweeps. In this case, the eigenvalues and eigenvectors are still computed because non-convergence comes from improper tolerance of maximum sweeps.
///
/// if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors `V`.
///
/// Please visit [cuSOLVER Library Samples - sygvj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvj) for a code example.
pub fn cusolverDnSsygvj(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
B: *mut f32,
ldb: ::core::ffi::c_int,
W: *mut f32,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix-pair (`A`,`B`). The generalized symmetric-definite eigenvalue problem is:
/// $$
/// \operatorname{eig}(A,B) =
/// \begin{cases}
/// A * V = B * V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1} \\
/// A * B * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_2} \\
/// B * A * V = V * \Lambda & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// where the matrix `B` is positive definite. `Λ` is a real $n \times n$ diagonal matrix. The diagonal elements of `Λ` are the eigenvalues of (`A`, `B`) in ascending order. `V` is an $n \times n$ orthogonal matrix. The eigenvectors are normalized as follows:
/// $$
/// \begin{cases}
/// V^H * B * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_1 or CUSOLVER_EIG_TYPE_2} \\
/// V^H * \operatorname{inv}(B) * V = I & \text{if } itype = \text{CUSOLVER_EIG_TYPE_3}
/// \end{cases}
/// $$
///
/// This function has the same functionality as `sygvd` except that `syevd` in `sygvd` is replaced by `syevj` in `sygvj`. Therefore, `sygvj` inherits properties of `syevj`, the user can use [`cusolverDnXsyevjSetTolerance`] and [`cusolverDnXsyevjSetMaxSweeps`] to configure tolerance and maximum sweeps.
///
/// However the meaning of residual is different from `syevj`. `sygvj` first computes Cholesky factorization of matrix `B`,
/// $$
/// B = L\\*L^{H}
/// $$
///
/// transform the problem to standard eigenvalue problem, then calls `syevj`.
///
/// For example, the standard eigenvalue problem of type I is:
/// $$
/// M\\*Q = Q\\*\Lambda
/// $$
///
/// where matrix `M` is symmetric:
/// $$
/// M = L^{-1}\\*A\\*L^{-H}
/// $$
///
/// The residual is the result of `syevj` on matrix `M`, not `A`.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `sygvj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (i > 0 and i<=n), `B` is not positive definite, the factorization of `B` could not be completed and no eigenvalues or eigenvectors were computed. If `info = n+1`, `syevj` does not converge under given tolerance and maximum sweeps. In this case, the eigenvalues and eigenvectors are still computed because non-convergence comes from improper tolerance of maximum sweeps.
///
/// if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthogonal eigenvectors `V`.
///
/// Please visit [cuSOLVER Library Samples - sygvj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/sygvj) for a code example.
pub fn cusolverDnDsygvj(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
B: *mut f64,
ldb: ::core::ffi::c_int,
W: *mut f64,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnChegvj(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
W: *mut f32,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZhegvj(
handle: cusolverDnHandle_t,
itype: cusolverEigType_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
W: *mut f64,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: syevjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the structure of `gesvdj` and `gesvdjBatched` to default values.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `gesvdj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The structure was initialized successfully.
pub fn cusolverDnCreateGesvdjInfo(info: *mut gesvdjInfo_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: The structure of `gesvdj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The resources were released successfully.
pub fn cusolverDnDestroyGesvdjInfo(info: gesvdjInfo_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function configures tolerance of `gesvdj`.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `gesvdj`.
/// - `tolerance`: Accuracy of numerical singular values.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdjSetTolerance(
info: gesvdjInfo_t,
tolerance: f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function configures the maximum number of sweeps in `gesvdj`. The default value is 100.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `gesvdj`.
/// - `max_sweeps`: Maximum number of sweeps.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdjSetMaxSweeps(
info: gesvdjInfo_t,
max_sweeps: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// If `sort_svd` is zero, the singular values are not sorted. This function only works for `gesvdjBatched`. `gesvdj` always sorts singular values in descending order. By default, singular values are always sorted in descending order.
///
/// # Parameters
///
/// - `info`: The pointer to the structure of `gesvdj`.
/// - `sort_svd`: If `sort_svd` is zero, the singular values are not sorted.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdjSetSortEig(
info: gesvdjInfo_t,
sort_svd: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function reports the Frobenius norm of the internal residual returned by `gesvdj`. Note that this is `not` the Frobenious norm of the exact residual calculated as:
/// $$
/// {\\|{S} - {U}^{H}\\*{A}\\*{V}\\|}_{F}
/// $$
///
/// This function does not support `gesvdjBatched`. If the user calls this function after `gesvdjBatched`, the error [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`] is returned.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `info`: The pointer to the structure of `gesvdj`.
/// - `residual`: Residual of `gesvdj`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]: Does not support batched version.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdjGetResidual(
handle: cusolverDnHandle_t,
info: gesvdjInfo_t,
residual: *mut f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function reports number of executed sweeps of `gesvdj`. It does not support `gesvdjBatched`. If the user calls this function after `gesvdjBatched`, the error [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`] is returned.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `info`: The pointer to the structure of `gesvdj`.
/// - `executed_sweeps`: Number of executed sweeps.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]: Does not support batched version.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdjGetSweeps(
handle: cusolverDnHandle_t,
info: gesvdjInfo_t,
executed_sweeps: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgesvdjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
S: *const f32,
U: *const f32,
ldu: ::core::ffi::c_int,
V: *const f32,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgesvdjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
S: *const f64,
U: *const f64,
ldu: ::core::ffi::c_int,
V: *const f64,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgesvdjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
S: *const f32,
U: *const cuComplex,
ldu: ::core::ffi::c_int,
V: *const cuComplex,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgesvdjBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
S: *const f64,
U: *const cuDoubleComplex,
ldu: ::core::ffi::c_int,
V: *const cuDoubleComplex,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes singular values and singular vectors of a sequence of general $m \times n$ matrices:
/// $$
/// A_{j} = U_{j}\\*\Sigma_{j}\\*V_{j}^{H}
/// $$
///
/// where $\Sigma_{j}$ is a real $m \times n$ diagonal matrix which is zero except for its `min(m,n)` diagonal elements. $U_{j}$ (left singular vectors) is an $m \times m$ unitary matrix and $V_{j}$ (right singular vectors) is a $n \times n$ unitary matrix. The diagonal elements of $\Sigma_{j}$ are the singular values of $A_{j}$ in either descending order or non-sorting order.
///
/// `gesvdjBatched` performs `gesvdj` on each matrix. It requires that all matrices are of the same size `m,n` no greater than 32 and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `S` also contains singular values of each matrix in contiguous way,
/// $$
/// \begin{split}S = \begin{pmatrix}
/// {S0} & {S1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ min(m,n)\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `gesvdjBatched` can either sort the singular values in descending order (default) or chose as-is (without sorting) by the function [`cusolverDnXgesvdjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate singular values of those tiny matrices.
///
/// `gesvdjBatched` cannot report residual and executed sweeps by function [`cusolverDnXgesvdjGetResidual`] and [`cusolverDnXgesvdjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// Please visit [cuSOLVER Library Samples - gesvdjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdjBatched) for a code example.
pub fn cusolverDnSgesvdjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut f32,
ldu: ::core::ffi::c_int,
V: *mut f32,
ldv: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes singular values and singular vectors of a sequence of general $m \times n$ matrices:
/// $$
/// A_{j} = U_{j}\\*\Sigma_{j}\\*V_{j}^{H}
/// $$
///
/// where $\Sigma_{j}$ is a real $m \times n$ diagonal matrix which is zero except for its `min(m,n)` diagonal elements. $U_{j}$ (left singular vectors) is an $m \times m$ unitary matrix and $V_{j}$ (right singular vectors) is a $n \times n$ unitary matrix. The diagonal elements of $\Sigma_{j}$ are the singular values of $A_{j}$ in either descending order or non-sorting order.
///
/// `gesvdjBatched` performs `gesvdj` on each matrix. It requires that all matrices are of the same size `m,n` no greater than 32 and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `S` also contains singular values of each matrix in contiguous way,
/// $$
/// \begin{split}S = \begin{pmatrix}
/// {S0} & {S1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ min(m,n)\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `gesvdjBatched` can either sort the singular values in descending order (default) or chose as-is (without sorting) by the function [`cusolverDnXgesvdjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate singular values of those tiny matrices.
///
/// `gesvdjBatched` cannot report residual and executed sweeps by function [`cusolverDnXgesvdjGetResidual`] and [`cusolverDnXgesvdjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// Please visit [cuSOLVER Library Samples - gesvdjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdjBatched) for a code example.
pub fn cusolverDnDgesvdjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut f64,
ldu: ::core::ffi::c_int,
V: *mut f64,
ldv: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes singular values and singular vectors of a sequence of general $m \times n$ matrices:
/// $$
/// A_{j} = U_{j}\\*\Sigma_{j}\\*V_{j}^{H}
/// $$
///
/// where $\Sigma_{j}$ is a real $m \times n$ diagonal matrix which is zero except for its `min(m,n)` diagonal elements. $U_{j}$ (left singular vectors) is an $m \times m$ unitary matrix and $V_{j}$ (right singular vectors) is a $n \times n$ unitary matrix. The diagonal elements of $\Sigma_{j}$ are the singular values of $A_{j}$ in either descending order or non-sorting order.
///
/// `gesvdjBatched` performs `gesvdj` on each matrix. It requires that all matrices are of the same size `m,n` no greater than 32 and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `S` also contains singular values of each matrix in contiguous way,
/// $$
/// \begin{split}S = \begin{pmatrix}
/// {S0} & {S1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ min(m,n)\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `gesvdjBatched` can either sort the singular values in descending order (default) or chose as-is (without sorting) by the function [`cusolverDnXgesvdjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate singular values of those tiny matrices.
///
/// `gesvdjBatched` cannot report residual and executed sweeps by function [`cusolverDnXgesvdjGetResidual`] and [`cusolverDnXgesvdjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// Please visit [cuSOLVER Library Samples - gesvdjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdjBatched) for a code example.
pub fn cusolverDnCgesvdjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut cuComplex,
ldu: ::core::ffi::c_int,
V: *mut cuComplex,
ldv: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes singular values and singular vectors of a sequence of general $m \times n$ matrices:
/// $$
/// A_{j} = U_{j}\\*\Sigma_{j}\\*V_{j}^{H}
/// $$
///
/// where $\Sigma_{j}$ is a real $m \times n$ diagonal matrix which is zero except for its `min(m,n)` diagonal elements. $U_{j}$ (left singular vectors) is an $m \times m$ unitary matrix and $V_{j}$ (right singular vectors) is a $n \times n$ unitary matrix. The diagonal elements of $\Sigma_{j}$ are the singular values of $A_{j}$ in either descending order or non-sorting order.
///
/// `gesvdjBatched` performs `gesvdj` on each matrix. It requires that all matrices are of the same size `m,n` no greater than 32 and are packed in contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `S` also contains singular values of each matrix in contiguous way,
/// $$
/// \begin{split}S = \begin{pmatrix}
/// {S0} & {S1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ min(m,n)\\*k\rbrack}$.
///
/// Except for tolerance and maximum sweeps, `gesvdjBatched` can either sort the singular values in descending order (default) or chose as-is (without sorting) by the function [`cusolverDnXgesvdjSetSortEig`]. If the user packs several tiny matrices into diagonal blocks of one matrix, non-sorting option can separate singular values of those tiny matrices.
///
/// `gesvdjBatched` cannot report residual and executed sweeps by function [`cusolverDnXgesvdjGetResidual`] and [`cusolverDnXgesvdjGetSweeps`]. Any call of the above two returns [`cusolverStatus_t::CUSOLVER_STATUS_NOT_SUPPORTED`]. The user needs to compute residual explicitly.
///
/// The user has to provide working space pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdjBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdjBatched` does not converge on `i-th` matrix under given tolerance and maximum sweeps.
///
/// Please visit [cuSOLVER Library Samples - gesvdjBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdjBatched) for a code example.
pub fn cusolverDnZgesvdjBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut cuDoubleComplex,
ldu: ::core::ffi::c_int,
V: *mut cuDoubleComplex,
ldv: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgesvdj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
S: *const f32,
U: *const f32,
ldu: ::core::ffi::c_int,
V: *const f32,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgesvdj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
S: *const f64,
U: *const f64,
ldu: ::core::ffi::c_int,
V: *const f64,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgesvdj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuComplex,
lda: ::core::ffi::c_int,
S: *const f32,
U: *const cuComplex,
ldu: ::core::ffi::c_int,
V: *const cuComplex,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgesvdj_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
S: *const f64,
U: *const cuDoubleComplex,
ldu: ::core::ffi::c_int,
V: *const cuDoubleComplex,
ldv: ::core::ffi::c_int,
lwork: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvdj` has the same functionality as `gesvd`. The difference is that `gesvd` uses QR algorithm and `gesvdj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `gesvdj` to perform approximation up to certain accuracy.
///
/// `gesvdj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and diagonal of `E` is zero.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `S` is the set of singular values. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is given tolerance. Note that if the real residual norm:
/// $$
/// {\\|{S} - {U}^{H}\\*{A}\\*{V}\\|}_{F}
/// $$
///
/// is computed, it will differ from ${\\|{E}\\|}_{F}$ up to roundoff errors of order $N = max(m, n)$, to still have the standard SVD accuracy expectation:
/// $$
/// \frac{\\|S - U^{H} \\* A \\* V\\|_F}{O(N) \\* \\|A\\|_F} \leq \frac{\\|E\\|_F}{\\|A\\|_F} \leq \operatorname{eps}
/// $$
///
/// $O(N)$ is typically $N$, but the constant depends on the number of sweeps, which gives an upper roundoff error bound of $sweeps \\* N$.
///
/// `gesvdj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXgesvdjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXgesvdjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `gesvdj` stops either tolerance is met or maximum number of sweeps is met.
///
/// Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = min(m,n)+1`, `gesvdj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `gesvdj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// Please visit [cuSOLVER Library Samples - gesvdj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdj) for a code example.
///
/// Remark 1: `gesvdj` supports any combination of `m` and `n`.
///
/// Remark 2: the routine returns `V`, not $V^{H}$. This is different from `gesvd`.
pub fn cusolverDnSgesvdj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f32,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut f32,
ldu: ::core::ffi::c_int,
V: *mut f32,
ldv: ::core::ffi::c_int,
work: *mut f32,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvdj` has the same functionality as `gesvd`. The difference is that `gesvd` uses QR algorithm and `gesvdj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `gesvdj` to perform approximation up to certain accuracy.
///
/// `gesvdj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and diagonal of `E` is zero.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `S` is the set of singular values. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is given tolerance. Note that if the real residual norm:
/// $$
/// {\\|{S} - {U}^{H}\\*{A}\\*{V}\\|}_{F}
/// $$
///
/// is computed, it will differ from ${\\|{E}\\|}_{F}$ up to roundoff errors of order $N = max(m, n)$, to still have the standard SVD accuracy expectation:
/// $$
/// \frac{\\|S - U^{H} \\* A \\* V\\|_F}{O(N) \\* \\|A\\|_F} \leq \frac{\\|E\\|_F}{\\|A\\|_F} \leq \operatorname{eps}
/// $$
///
/// $O(N)$ is typically $N$, but the constant depends on the number of sweeps, which gives an upper roundoff error bound of $sweeps \\* N$.
///
/// `gesvdj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXgesvdjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXgesvdjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `gesvdj` stops either tolerance is met or maximum number of sweeps is met.
///
/// Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = min(m,n)+1`, `gesvdj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `gesvdj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// Please visit [cuSOLVER Library Samples - gesvdj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdj) for a code example.
///
/// Remark 1: `gesvdj` supports any combination of `m` and `n`.
///
/// Remark 2: the routine returns `V`, not $V^{H}$. This is different from `gesvd`.
pub fn cusolverDnDgesvdj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut f64,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut f64,
ldu: ::core::ffi::c_int,
V: *mut f64,
ldv: ::core::ffi::c_int,
work: *mut f64,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvdj` has the same functionality as `gesvd`. The difference is that `gesvd` uses QR algorithm and `gesvdj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `gesvdj` to perform approximation up to certain accuracy.
///
/// `gesvdj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and diagonal of `E` is zero.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `S` is the set of singular values. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is given tolerance. Note that if the real residual norm:
/// $$
/// {\\|{S} - {U}^{H}\\*{A}\\*{V}\\|}_{F}
/// $$
///
/// is computed, it will differ from ${\\|{E}\\|}_{F}$ up to roundoff errors of order $N = max(m, n)$, to still have the standard SVD accuracy expectation:
/// $$
/// \frac{\\|S - U^{H} \\* A \\* V\\|_F}{O(N) \\* \\|A\\|_F} \leq \frac{\\|E\\|_F}{\\|A\\|_F} \leq \operatorname{eps}
/// $$
///
/// $O(N)$ is typically $N$, but the constant depends on the number of sweeps, which gives an upper roundoff error bound of $sweeps \\* N$.
///
/// `gesvdj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXgesvdjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXgesvdjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `gesvdj` stops either tolerance is met or maximum number of sweeps is met.
///
/// Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = min(m,n)+1`, `gesvdj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `gesvdj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// Please visit [cuSOLVER Library Samples - gesvdj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdj) for a code example.
///
/// Remark 1: `gesvdj` supports any combination of `m` and `n`.
///
/// Remark 2: the routine returns `V`, not $V^{H}$. This is different from `gesvd`.
pub fn cusolverDnCgesvdj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuComplex,
lda: ::core::ffi::c_int,
S: *mut f32,
U: *mut cuComplex,
ldu: ::core::ffi::c_int,
V: *mut cuComplex,
ldv: ::core::ffi::c_int,
work: *mut cuComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvdj` has the same functionality as `gesvd`. The difference is that `gesvd` uses QR algorithm and `gesvdj` uses Jacobi method. The parallelism of Jacobi method gives GPU better performance on small and medium size matrices. Moreover the user can configure `gesvdj` to perform approximation up to certain accuracy.
///
/// `gesvdj` iteratively generates a sequence of unitary matrices to transform matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and diagonal of `E` is zero.
///
/// During the iterations, the Frobenius norm of `E` decreases monotonically. As `E` goes down to zero, `S` is the set of singular values. In practice, Jacobi method stops if:
/// $$
/// {\\|E\\|}_{F}\leq\operatorname{eps}\\*{\\|A\\|}_{F}
/// $$
///
/// where `eps` is given tolerance. Note that if the real residual norm:
/// $$
/// {\\|{S} - {U}^{H}\\*{A}\\*{V}\\|}_{F}
/// $$
///
/// is computed, it will differ from ${\\|{E}\\|}_{F}$ up to roundoff errors of order $N = max(m, n)$, to still have the standard SVD accuracy expectation:
/// $$
/// \frac{\\|S - U^{H} \\* A \\* V\\|_F}{O(N) \\* \\|A\\|_F} \leq \frac{\\|E\\|_F}{\\|A\\|_F} \leq \operatorname{eps}
/// $$
///
/// $O(N)$ is typically $N$, but the constant depends on the number of sweeps, which gives an upper roundoff error bound of $sweeps \\* N$.
///
/// `gesvdj` has two parameters to control the accuracy. First parameter is tolerance (`eps`). The default value is machine accuracy but The user can use function [`cusolverDnXgesvdjSetTolerance`] to set a priori tolerance. The second parameter is maximum number of sweeps which controls number of iterations of Jacobi method. The default value is 100 but the user can use function [`cusolverDnXgesvdjSetMaxSweeps`] to set a proper bound. The experiments show 15 sweeps are good enough to converge to machine accuracy. `gesvdj` stops either tolerance is met or maximum number of sweeps is met.
///
/// Jacobi method has quadratic convergence, so the accuracy is not proportional to number of sweeps. To guarantee certain accuracy, the user should configure tolerance only.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdj_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = min(m,n)+1`, `gesvdj` does not converge under given tolerance and maximum sweeps.
///
/// If the user sets an improper tolerance, `gesvdj` may not converge. For example, tolerance should not be smaller than machine accuracy.
///
/// Please visit [cuSOLVER Library Samples - gesvdj](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdj) for a code example.
///
/// Remark 1: `gesvdj` supports any combination of `m` and `n`.
///
/// Remark 2: the routine returns `V`, not $V^{H}$. This is different from `gesvd`.
pub fn cusolverDnZgesvdj(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *mut cuDoubleComplex,
lda: ::core::ffi::c_int,
S: *mut f64,
U: *mut cuDoubleComplex,
ldu: ::core::ffi::c_int,
V: *mut cuDoubleComplex,
ldv: ::core::ffi::c_int,
work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
info: *mut ::core::ffi::c_int,
params: gesvdjInfo_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnSgesvdaStridedBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const f32,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *const f32,
strideS: ::core::ffi::c_longlong,
d_U: *const f32,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *const f32,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
lwork: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnDgesvdaStridedBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const f64,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *const f64,
strideS: ::core::ffi::c_longlong,
d_U: *const f64,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *const f64,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
lwork: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnCgesvdaStridedBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const cuComplex,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *const f32,
strideS: ::core::ffi::c_longlong,
d_U: *const cuComplex,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *const cuComplex,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
lwork: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnZgesvdaStridedBatched_bufferSize(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *const f64,
strideS: ::core::ffi::c_longlong,
d_U: *const cuDoubleComplex,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *const cuDoubleComplex,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
lwork: *mut ::core::ffi::c_int,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function `gesvda` (`a` stands for approximate) approximates the singular value decomposition of a tall skinny $m \times n$ matrix `A` and corresponding the left and right singular vectors. The economy form of SVD is written by:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $n \times n$ matrix. `U` is an $m \times n$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvda` computes eigenvalues of `A**T*A`, or `A**H*A` (if `A` is complex), to approximate singular values and singular vectors. It generates matrices `U` and `V` and transforms the matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and `E` depends on rounding errors. To certain conditions, `U`, `V` and `S` approximate singular values and singular vectors up to machine zero of single precision. In general, `V` is unitary, `S` is more accurate than `U`. If singular value is far from zero, then left singular vector `U` is accurate. In other words, the accuracy of singular values and left singular vectors depend on the distance between singular value and zero. Since the computation of `A**T*A`, or `A**H*A` can greatly amplify errors, it is recommended to use `gesvda` only with well-conditioned data.
///
/// The input parameter `rank` decides the number of singular values and singular vectors are computed in parameter `S`, `U` and `V`.
///
/// The output parameter `h_RnrmF` computes Frobenius norm of residual. To compute `h_RnrmF`, `info != NULL` is required.
/// $$
/// A - U\\*S\\*V^{H}
/// $$
///
/// if the parameter `rank` is equal `n`. Otherwise, `h_RnrmF` reports:
/// $$
/// {\\|}U\\*S\\*V^{H}{\\|} - {\\|S\\|}
/// $$
///
/// in Frobenius norm sense, that is, how far `U` is from unitary.
///
/// `gesvdaStridedBatched` performs `gesvda` on each matrix. It requires that all matrices are of the same size `m,n` and are packed in a contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ strideA\\*k\rbrack}$. Similarly, the formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ StrideS\\*k\rbrack}$, the formula for random access of `U` is $U_{k}\operatorname{(i,j)} = {U\lbrack\ i\ +\ ldu\\*j\ +\ strideU\\*k\rbrack}$ and the formula for random access of `V` is $V_{k}\operatorname{(i,j)} = {V\lbrack\ i\ +\ ldv\\*j\ +\ strideV\\*k\rbrack}$.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdaStridedBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdaStridedBatched` did not converge on the `i-th` matrix. If `0 < info\[i\] < min(m,n)+1`, `gesvdaStridedBatched` could not compute an SVD of the `i-th` matrix fully; the leading singular values `Si\[k\]`, `0 <= k <= info\[i\]-1`, and corresponding singular vectors may still be useful. In this case, if `h_RnrmF` is requested, `h_RnrmF` reports the residual as if `rank` was set to `info\[i\]-1`.
///
/// Note that the problem size is limited by the condition `batchSize*stride{A/S/U/V}<=INT32_MAX` primarily due to the current implementation constraints.
///
/// Please visit [cuSOLVER Library Samples - gesvdaStridedBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdaStridedBatched) for a code example.
///
/// Remark 1: The routine returns `V`, not $V^{H}$. This is different from `gesvd`.
///
/// Remark 2: The routine only supports `m >=n`.
///
/// Remark 3: It is recommended to use an FP64 data type, that is `DgesvdaStridedBatched` or `ZgesvdaStridedBatched`.
///
/// Remark 4: If the user is confident on the accuracy of singular values and singular vectors, for example, certain conditions hold (required singular value is far from zero), then the performance can be improved by passing a null pointer to `h_RnrmF`, i.e. no computation of the residual norm.
pub fn cusolverDnSgesvdaStridedBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const f32,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *mut f32,
strideS: ::core::ffi::c_longlong,
d_U: *mut f32,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *mut f32,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
d_work: *mut f32,
lwork: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
h_R_nrmF: *mut f64,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function `gesvda` (`a` stands for approximate) approximates the singular value decomposition of a tall skinny $m \times n$ matrix `A` and corresponding the left and right singular vectors. The economy form of SVD is written by:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $n \times n$ matrix. `U` is an $m \times n$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvda` computes eigenvalues of `A**T*A`, or `A**H*A` (if `A` is complex), to approximate singular values and singular vectors. It generates matrices `U` and `V` and transforms the matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and `E` depends on rounding errors. To certain conditions, `U`, `V` and `S` approximate singular values and singular vectors up to machine zero of single precision. In general, `V` is unitary, `S` is more accurate than `U`. If singular value is far from zero, then left singular vector `U` is accurate. In other words, the accuracy of singular values and left singular vectors depend on the distance between singular value and zero. Since the computation of `A**T*A`, or `A**H*A` can greatly amplify errors, it is recommended to use `gesvda` only with well-conditioned data.
///
/// The input parameter `rank` decides the number of singular values and singular vectors are computed in parameter `S`, `U` and `V`.
///
/// The output parameter `h_RnrmF` computes Frobenius norm of residual. To compute `h_RnrmF`, `info != NULL` is required.
/// $$
/// A - U\\*S\\*V^{H}
/// $$
///
/// if the parameter `rank` is equal `n`. Otherwise, `h_RnrmF` reports:
/// $$
/// {\\|}U\\*S\\*V^{H}{\\|} - {\\|S\\|}
/// $$
///
/// in Frobenius norm sense, that is, how far `U` is from unitary.
///
/// `gesvdaStridedBatched` performs `gesvda` on each matrix. It requires that all matrices are of the same size `m,n` and are packed in a contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ strideA\\*k\rbrack}$. Similarly, the formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ StrideS\\*k\rbrack}$, the formula for random access of `U` is $U_{k}\operatorname{(i,j)} = {U\lbrack\ i\ +\ ldu\\*j\ +\ strideU\\*k\rbrack}$ and the formula for random access of `V` is $V_{k}\operatorname{(i,j)} = {V\lbrack\ i\ +\ ldv\\*j\ +\ strideV\\*k\rbrack}$.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdaStridedBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdaStridedBatched` did not converge on the `i-th` matrix. If `0 < info\[i\] < min(m,n)+1`, `gesvdaStridedBatched` could not compute an SVD of the `i-th` matrix fully; the leading singular values `Si\[k\]`, `0 <= k <= info\[i\]-1`, and corresponding singular vectors may still be useful. In this case, if `h_RnrmF` is requested, `h_RnrmF` reports the residual as if `rank` was set to `info\[i\]-1`.
///
/// Note that the problem size is limited by the condition `batchSize*stride{A/S/U/V}<=INT32_MAX` primarily due to the current implementation constraints.
///
/// Please visit [cuSOLVER Library Samples - gesvdaStridedBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdaStridedBatched) for a code example.
///
/// Remark 1: The routine returns `V`, not $V^{H}$. This is different from `gesvd`.
///
/// Remark 2: The routine only supports `m >=n`.
///
/// Remark 3: It is recommended to use an FP64 data type, that is `DgesvdaStridedBatched` or `ZgesvdaStridedBatched`.
///
/// Remark 4: If the user is confident on the accuracy of singular values and singular vectors, for example, certain conditions hold (required singular value is far from zero), then the performance can be improved by passing a null pointer to `h_RnrmF`, i.e. no computation of the residual norm.
pub fn cusolverDnDgesvdaStridedBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const f64,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *mut f64,
strideS: ::core::ffi::c_longlong,
d_U: *mut f64,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *mut f64,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
d_work: *mut f64,
lwork: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
h_R_nrmF: *mut f64,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function `gesvda` (`a` stands for approximate) approximates the singular value decomposition of a tall skinny $m \times n$ matrix `A` and corresponding the left and right singular vectors. The economy form of SVD is written by:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $n \times n$ matrix. `U` is an $m \times n$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvda` computes eigenvalues of `A**T*A`, or `A**H*A` (if `A` is complex), to approximate singular values and singular vectors. It generates matrices `U` and `V` and transforms the matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and `E` depends on rounding errors. To certain conditions, `U`, `V` and `S` approximate singular values and singular vectors up to machine zero of single precision. In general, `V` is unitary, `S` is more accurate than `U`. If singular value is far from zero, then left singular vector `U` is accurate. In other words, the accuracy of singular values and left singular vectors depend on the distance between singular value and zero. Since the computation of `A**T*A`, or `A**H*A` can greatly amplify errors, it is recommended to use `gesvda` only with well-conditioned data.
///
/// The input parameter `rank` decides the number of singular values and singular vectors are computed in parameter `S`, `U` and `V`.
///
/// The output parameter `h_RnrmF` computes Frobenius norm of residual. To compute `h_RnrmF`, `info != NULL` is required.
/// $$
/// A - U\\*S\\*V^{H}
/// $$
///
/// if the parameter `rank` is equal `n`. Otherwise, `h_RnrmF` reports:
/// $$
/// {\\|}U\\*S\\*V^{H}{\\|} - {\\|S\\|}
/// $$
///
/// in Frobenius norm sense, that is, how far `U` is from unitary.
///
/// `gesvdaStridedBatched` performs `gesvda` on each matrix. It requires that all matrices are of the same size `m,n` and are packed in a contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ strideA\\*k\rbrack}$. Similarly, the formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ StrideS\\*k\rbrack}$, the formula for random access of `U` is $U_{k}\operatorname{(i,j)} = {U\lbrack\ i\ +\ ldu\\*j\ +\ strideU\\*k\rbrack}$ and the formula for random access of `V` is $V_{k}\operatorname{(i,j)} = {V\lbrack\ i\ +\ ldv\\*j\ +\ strideV\\*k\rbrack}$.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdaStridedBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdaStridedBatched` did not converge on the `i-th` matrix. If `0 < info\[i\] < min(m,n)+1`, `gesvdaStridedBatched` could not compute an SVD of the `i-th` matrix fully; the leading singular values `Si\[k\]`, `0 <= k <= info\[i\]-1`, and corresponding singular vectors may still be useful. In this case, if `h_RnrmF` is requested, `h_RnrmF` reports the residual as if `rank` was set to `info\[i\]-1`.
///
/// Note that the problem size is limited by the condition `batchSize*stride{A/S/U/V}<=INT32_MAX` primarily due to the current implementation constraints.
///
/// Please visit [cuSOLVER Library Samples - gesvdaStridedBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdaStridedBatched) for a code example.
///
/// Remark 1: The routine returns `V`, not $V^{H}$. This is different from `gesvd`.
///
/// Remark 2: The routine only supports `m >=n`.
///
/// Remark 3: It is recommended to use an FP64 data type, that is `DgesvdaStridedBatched` or `ZgesvdaStridedBatched`.
///
/// Remark 4: If the user is confident on the accuracy of singular values and singular vectors, for example, certain conditions hold (required singular value is far from zero), then the performance can be improved by passing a null pointer to `h_RnrmF`, i.e. no computation of the residual norm.
pub fn cusolverDnCgesvdaStridedBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const cuComplex,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *mut f32,
strideS: ::core::ffi::c_longlong,
d_U: *mut cuComplex,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *mut cuComplex,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
d_work: *mut cuComplex,
lwork: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
h_R_nrmF: *mut f64,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The S and D data types are real valued single and double precision, respectively.
///
/// The C and Z data types are complex valued single and double precision, respectively.
///
/// This function `gesvda` (`a` stands for approximate) approximates the singular value decomposition of a tall skinny $m \times n$ matrix `A` and corresponding the left and right singular vectors. The economy form of SVD is written by:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $n \times n$ matrix. `U` is an $m \times n$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. `U` and `V` are the left and right singular vectors of `A`.
///
/// `gesvda` computes eigenvalues of `A**T*A`, or `A**H*A` (if `A` is complex), to approximate singular values and singular vectors. It generates matrices `U` and `V` and transforms the matrix `A` to the following form:
/// $$
/// U^{H}\\*A\\*V = S + E
/// $$
///
/// where `S` is diagonal and `E` depends on rounding errors. To certain conditions, `U`, `V` and `S` approximate singular values and singular vectors up to machine zero of single precision. In general, `V` is unitary, `S` is more accurate than `U`. If singular value is far from zero, then left singular vector `U` is accurate. In other words, the accuracy of singular values and left singular vectors depend on the distance between singular value and zero. Since the computation of `A**T*A`, or `A**H*A` can greatly amplify errors, it is recommended to use `gesvda` only with well-conditioned data.
///
/// The input parameter `rank` decides the number of singular values and singular vectors are computed in parameter `S`, `U` and `V`.
///
/// The output parameter `h_RnrmF` computes Frobenius norm of residual. To compute `h_RnrmF`, `info != NULL` is required.
/// $$
/// A - U\\*S\\*V^{H}
/// $$
///
/// if the parameter `rank` is equal `n`. Otherwise, `h_RnrmF` reports:
/// $$
/// {\\|}U\\*S\\*V^{H}{\\|} - {\\|S\\|}
/// $$
///
/// in Frobenius norm sense, that is, how far `U` is from unitary.
///
/// `gesvdaStridedBatched` performs `gesvda` on each matrix. It requires that all matrices are of the same size `m,n` and are packed in a contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ strideA\\*k\rbrack}$. Similarly, the formula for random access of `S` is $S_{k}\operatorname{(j)} = {S\lbrack\ j\ +\ StrideS\\*k\rbrack}$, the formula for random access of `U` is $U_{k}\operatorname{(i,j)} = {U\lbrack\ i\ +\ ldu\\*j\ +\ strideU\\*k\rbrack}$ and the formula for random access of `V` is $V_{k}\operatorname{(i,j)} = {V\lbrack\ i\ +\ ldv\\*j\ +\ strideV\\*k\rbrack}$.
///
/// The user has to provide working space which is pointed by input parameter `work`. The input parameter `lwork` is the size of the working space, and it is returned by `gesvdaStridedBatched_bufferSize()`. Please note that the size in bytes of the working space is equal to `sizeof(<type>) * lwork`.
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = min(m,n)+1`, `gesvdaStridedBatched` did not converge on the `i-th` matrix. If `0 < info\[i\] < min(m,n)+1`, `gesvdaStridedBatched` could not compute an SVD of the `i-th` matrix fully; the leading singular values `Si\[k\]`, `0 <= k <= info\[i\]-1`, and corresponding singular vectors may still be useful. In this case, if `h_RnrmF` is requested, `h_RnrmF` reports the residual as if `rank` was set to `info\[i\]-1`.
///
/// Note that the problem size is limited by the condition `batchSize*stride{A/S/U/V}<=INT32_MAX` primarily due to the current implementation constraints.
///
/// Please visit [cuSOLVER Library Samples - gesvdaStridedBatched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/gesvdaStridedBatched) for a code example.
///
/// Remark 1: The routine returns `V`, not $V^{H}$. This is different from `gesvd`.
///
/// Remark 2: The routine only supports `m >=n`.
///
/// Remark 3: It is recommended to use an FP64 data type, that is `DgesvdaStridedBatched` or `ZgesvdaStridedBatched`.
///
/// Remark 4: If the user is confident on the accuracy of singular values and singular vectors, for example, certain conditions hold (required singular value is far from zero), then the performance can be improved by passing a null pointer to `h_RnrmF`, i.e. no computation of the residual norm.
pub fn cusolverDnZgesvdaStridedBatched(
handle: cusolverDnHandle_t,
jobz: cusolverEigMode_t,
rank: ::core::ffi::c_int,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
d_A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
strideA: ::core::ffi::c_longlong,
d_S: *mut f64,
strideS: ::core::ffi::c_longlong,
d_U: *mut cuDoubleComplex,
ldu: ::core::ffi::c_int,
strideU: ::core::ffi::c_longlong,
d_V: *mut cuDoubleComplex,
ldv: ::core::ffi::c_int,
strideV: ::core::ffi::c_longlong,
d_work: *mut cuDoubleComplex,
lwork: ::core::ffi::c_int,
d_info: *mut ::core::ffi::c_int,
h_R_nrmF: *mut f64,
batchSize: ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the structure of `64-bit API` to default values.
///
/// # Parameters
///
/// - `params`: The pointer to the structure of `64-bit API`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_ALLOC_FAILED`]: The resources could not be allocated.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The structure was initialized successfully.
pub fn cusolverDnCreateParams(params: *mut cusolverDnParams_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `params`: The structure of `64-bit API`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The resources were released successfully.
pub fn cusolverDnDestroyParams(params: cusolverDnParams_t) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function configures algorithm `algo` of `function`, a `64-bit API` routine.
///
/// # Parameters
///
/// - `params`: The pointer to the structure of `64-bit API`.
/// - `function`: The routine to be configured.
/// - `algo`: The algorithm to be configured.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Wrong combination of `function` and `algo`.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnSetAdvOptions(
params: cusolverDnParams_t,
function: cusolverDnFunction_t,
algo: cusolverAlgMode_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXpotrf_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes the Cholesky factorization of a Hermitian positive-definite matrix using the generic API interface.
///
/// `A` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], only lower triangular part of `A` is processed, and replaced by lower triangular Cholesky factor `L`.
/// $$
/// A = L\\*L^{H}
/// $$
///
/// If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], only upper triangular part of `A` is processed, and replaced by upper triangular Cholesky factor `U`.
/// $$
/// A = U^{H}\\*U
/// $$
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXpotrf_bufferSize`].
///
/// If Cholesky factorization failed, i.e. some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` is not a real number. The output parameter `info` would indicate smallest leading minor of `A` which is not positive definite.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Currently, [`cusolverDnXpotrf`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xpotrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xpotrf) for a code example.
///
/// **Algorithms supported by cusolverDnXpotrf**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXpotrf_bufferSize`] and [`cusolverDnXpotrf`]:
///
/// The generic API has two different types, `dataTypeA` is data type of the matrix `A`, `computeType` is compute type of the operation. [`cusolverDnXpotrf`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **ComputeType** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SPOTRF` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DPOTRF` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CPOTRF` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZPOTRF` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `uplo`: Indicates if matrix `A` lower or upper part is stored, the other part is not referenced.
/// - `n`: Number of rows and columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXpotrf_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXpotrf_bufferSize`].
/// - `info`: If `info = 0`, the Cholesky factorization is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info = i`, the leading minor of order `i` is not positive definite.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0` or `lda<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXpotrf(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a system of linear equations:
/// $$
/// A\\*X = B
/// $$
///
/// where `A` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful using the generic API interface. The input parameter `uplo` indicates which part of the matrix is used. The function will leave the other part untouched.
///
/// The user has to call [`cusolverDnXpotrf`] first to factorize matrix `A`. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\\*L^{H}$. If input parameter `uplo` is [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\\*U$.
///
/// The operation is in-place, i.e. matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Currently, [`cusolverDnXpotrs`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xpotrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xpotrf) for a code example.
///
/// **Algorithms supported by cusolverDnXpotrs**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXpotrs`]:
///
/// The generic API has two different types, `dataTypeA` is data type of the matrix `A`, `dataTypeB` is data type of the matrix `B`. [`cusolverDnXpotrs`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **dataTypeA** | **dataTypeB** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SPOTRS` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DPOTRS` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CPOTRS` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZPOTRS` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `uplo`: Indicates if matrix `A` lower or upper part is stored, the other part is not referenced.
/// - `n`: Number of rows and columns of matrix `A`.
/// - `nrhs`: Number of columns of matrix `X` and `B`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`. `A` is either lower Cholesky factor `L` or upper Cholesky factor `U`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeB`: Data type of array `B`.
/// - `B`: Array of dimension `ldb * nrhs`. `ldb` is not less than `max(1,n)`. As an input, `B` is right hand side matrix. As an output, `B` is the solution matrix.
/// - `info`: If `info = 0`, the Cholesky factorization is successful. if `info = -i`, the `i-th` parameter is wrong (not counting handle).
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, `nrhs<0`, `lda<max(1,n)` or `ldb<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXpotrs(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
uplo: cublasFillMode_t,
n: i64,
nrhs: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeB: cudaDataType,
B: *mut ::core::ffi::c_void,
ldb: i64,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgeqrf_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeTau: cudaDataType,
tau: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes the QR factorization of a $m \times n$ matrix:
/// $$
/// A = Q\\*R
/// $$
///
/// where `A` is an $m \times n$ matrix, `Q` is a $m \times n$ matrix, and `R` is an $n \times n$ upper triangular matrix using the generic API interface.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXgeqrf_bufferSize`].
///
/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
///
/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`. The leading nonzero element of householder vector is assumed to be 1 such that output parameter `TAU` contains the scaling factor `τ`. If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation:
/// $$
/// I - 2\\*v\\*v^{H} = I - \tau\\*q\\*q^{H}
/// $$
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Currently, [`cusolverDnXgeqrf`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xgeqrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgeqrf) for a code example.
///
/// **Algorithms supported by cusolverDnXgeqrf**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXgeqrf_bufferSize`] and [`cusolverDnXgeqrf`]:
///
/// The generic API has two different types, `dataTypeA` is data type of the matrix `A`, `dataTypeTau` is data type of the array `tau` and `computeType` is compute type of the operation. [`cusolverDnXgeqrf`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **ComputeType** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SGEQRF` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DGEQRF` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CGEQRF` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZGEQRF` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `m`: Number of rows of matrix `A`.
/// - `n`: Number of columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,m)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeTau`: Data type of array `tau`.
/// - `tau`: Array of dimension at least `min(m,n)`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgeqrf_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgeqrf_bufferSize`].
/// - `info`: If `info = 0`, the QR factorization is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle).
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`m,n<0` or `lda<max(1,m)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgeqrf(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeTau: cudaDataType,
tau: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgetrf_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper function below can calculate the sizes needed for pre-allocated buffer.
///
/// The function below
///
/// computes the LU factorization of a $m \times n$ matrix:
/// $$
/// P\\*A = L\\*U
/// $$
///
/// where `A` is a $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix using the generic API interface.
///
/// If LU factorization failed, i.e. matrix `A` (`U`) is singular, The output parameter `info=i` indicates `U(i,i) = 0`.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// If `ipiv` is null, no pivoting is performed. The factorization is `A=L*U`, which is not numerically stable.
///
/// No matter LU factorization failed or not, the output parameter `ipiv` contains pivoting sequence, row `i` is interchanged with row `ipiv(i)`.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXgetrf_bufferSize`].
///
/// The user can combine [`cusolverDnXgetrf`] and `cusolverDnGetrs` to complete a linear solver.
///
/// Currently, [`cusolverDnXgetrf`] supports two algorithms. To select legacy implementation, the user has to call [`cusolverDnSetAdvOptions`].
///
/// Please visit [cuSOLVER Library Samples - Xgetrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgetrf) for a code example.
///
/// **Algorithms supported by cusolverDnXgetrf**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. The fastest, requires a large workspace of `m*n` elements. |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_1`] | Legacy implementation |
///
/// List of input arguments for [`cusolverDnXgetrf_bufferSize`] and [`cusolverDnXgetrf`]:
///
/// The generic API has two different types, `dataTypeA` is data type of the matrix `A`, `computeType` is compute type of the operation. [`cusolverDnXgetrf`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **ComputeType** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SGETRF` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DGETRF` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CGETRF` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZGETRF` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `m`: Number of rows of matrix `A`.
/// - `n`: Number of columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: <type> array of dimension `lda * n` with `lda` is not less than `max(1,m)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `ipiv`: Array of size at least `min(m,n)`, containing pivot indices.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgetrf_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgetrf_bufferSize`].
/// - `info`: If `info = 0`, the LU factorization is successful. if `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info = i`, the `U(i,i) = 0`.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`m,n<0` or `lda<max(1,m)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgetrf(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
ipiv: *mut i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function solves a linear system of multiple right-hand sides:
/// $$
/// op(A)\\*X = B
/// $$
///
/// where `A` is an $n \times n$ matrix, and was LU-factored by [`cusolverDnXgetrf`], that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`. `B` is an $n \times {nrhs}$ right-hand side matrix using the generic API interface.
///
/// The input parameter `trans` is defined by:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUBLAS_OP_N} \\
/// A^T & \text{if } trans = \text{CUBLAS_OP_T} \\
/// A^H & \text{if } trans = \text{CUBLAS_OP_C}
/// \end{cases}
/// $$
///
/// The input parameter `ipiv` is an output of [`cusolverDnXgetrf`]. It contains pivot indices, which are used to permutate right-hand sides.
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// The user can combine [`cusolverDnXgetrf`] and [`cusolverDnXgetrs`] to complete a linear solver.
///
/// Currently, [`cusolverDnXgetrs`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xgetrf](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgetrf) for a code example.
///
/// **Algorithms supported by cusolverDnXgetrs**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXgetrs`]:
///
/// The generic API has two different types: `dataTypeA` is data type of the matrix `A` and `dataTypeB` is data type of the matrix `B`. [`cusolverDnXgetrs`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **dataTypeB** | **Meaning** |
/// | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `SGETRS` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `DGETRS` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CGETRS` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `ZGETRS` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `trans`: Operation `op(A)` that is non- or (conj.) transpose.
/// - `n`: Number of rows and columns of matrix `A`.
/// - `nrhs`: Number of right-hand sides.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `ipiv`: Array of size at least `n`, containing pivot indices.
/// - `dataTypeB`: Data type of array `B`.
/// - `B`: <type> array of dimension `ldb * nrhs` with `ldb` is not less than `max(1,n)`.
/// - `ldb`: Leading dimension of two-dimensional array used to store matrix `B`.
/// - `info`: If `info = 0`, the operation is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle).
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0` or `lda<max(1,n)` or `ldb<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgetrs(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
trans: cublasOperation_t,
n: i64,
nrhs: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
ipiv: *const i64,
dataTypeB: cudaDataType,
B: *mut ::core::ffi::c_void,
ldb: i64,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsyevd_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes eigenvalues and eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A` using the generic API interface. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real $n \times n$ diagonal matrix. `V` is an $n \times n$ unitary matrix. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXsyevd_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xsyevd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xsyevd) for a code example.
///
/// Currently, [`cusolverDnXsyevd`] supports only the default algorithm.
///
/// **Algorithms supported by cusolverDnXsyevd**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXsyevd_bufferSize`] and [`cusolverDnXsyevd`]:
///
/// The generic API has three different types, `dataTypeA` is data type of the matrix `A`, `dataTypeW` is data type of the matrix `W` and `computeType` is compute type of the operation. [`cusolverDnXsyevd`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeW** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SSYEVD` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DSYEVD` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CHEEVD` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `ZHEEVD` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobz`: Specifies options to either compute eigenvalue only or compute eigen-pair: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute eigenvalues only; `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute eigenvalues and eigenvectors.
/// - `uplo`: Specifies which part of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`]: Lower triangle of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]: Upper triangle of `A` is stored.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the leading n-by-n upper triangular part of `A` contains the upper triangular part of the matrix `A`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the leading n-by-n lower triangular part of `A` contains the lower triangular part of the matrix `A`. On exit, if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], and `info` = 0, `A` contains the orthonormal eigenvectors of the matrix `A`. If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], the contents of `A` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeW`: Data type of array `W`.
/// - `W`: A real array of dimension `n`. The eigenvalue values of `A`, in ascending order, i.e., sorted so that `W(i) <= W(i+1)`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsyevd_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsyevd_bufferSize`].
/// - `info`: If `info = 0`, the operation is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info = i (> 0)`, `info` indicates `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, or `lda<max(1,n)`, or `jobz` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `uplo` is not [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] or [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevd(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsyevBatched_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
batchSize: i64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes eigenvalues and eigenvectors of a sequence of symmetric (Hermitian) $n \times n$ matrices:
/// $$
/// A_j\\*V_j = V_j\\*\Lambda_j
/// $$
///
/// where $\Lambda_j$ is a real $n \times n$ diagonal matrix. $V_j$ is an $n \times n$ unitary matrix. The diagonal elements of $\Lambda_j$ are the eigenvalues of $A_j$ in ascending order.
///
/// `syevBatched` performs an eigendecomposition on each matrix. It requires that all matrices are of the same size `n` and are packed in a contiguous way,
/// $$
/// \begin{split}A = \begin{pmatrix}
/// {A0} & {A1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// Each matrix is column-major with leading dimension `lda`, so the formula for random access is $A_{k}\operatorname{(i,j)} = {A\lbrack\ i\ +\ lda\\*j\ +\ lda\\*n\\*k\rbrack}$.
///
/// The parameter `W` also contains the eigenvalues of each matrix in a contiguous way,
/// $$
/// \begin{split}W = \begin{pmatrix}
/// {W0} & {W1} & \cdots \\\\
/// \end{pmatrix}\end{split}
/// $$
///
/// The formula for random access of `W` is $W_{k}\operatorname{(j)} = {W\lbrack\ j\ +\ n\\*k\rbrack}$.
///
/// The user has to provide device and host working space which are pointed to by the input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` and `workspaceInBytesOnHost` denote the size in bytes of the device and host working space, and returned by [`cusolverDnXsyevBatched_bufferSize`].
///
/// The output parameter `info` is an integer array of size `batchSize`. If the function returns [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`], the first element `info\[0\] = -i` (less than zero) indicates the `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] > 0`, `syevBatched` does not converge on the `i-th` matrix.
///
/// If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], $A_{j}$ contains the orthonormal eigenvectors of the matrix $A_{j}$.
///
/// Note that the problem size is limited by the condition `n`lda`batchSize<=INT32_MAX` primarily due to the current implementation constraints.
///
/// **Algorithms supported by cusolverDnXsyevBatched**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default. May switch between algorithms for best performance. |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_1`] | Uses a single algorithm for consistent accuracy over all n. |
///
/// List of input arguments for [`cusolverDnXsyevBatched_bufferSize`] and [`cusolverDnXsyevBatched`]:
///
/// The generic API has three different types, `dataTypeA` is data type of the matrix `A`, `dataTypeW` is data type of the array `W` and `computeType` is compute type of the operation. [`cusolverDnXsyevBatched`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeW** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SSYEVBATCHED` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DSYEVBATCHED` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CSYEVBATCHED` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `ZSYEVBATCHED` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobz`: Specifies options to either compute eigenvalue only or compute eigen-pair: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute eigenvalues only; `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute eigenvalues and eigenvectors.
/// - `uplo`: Specifies which part of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`]: Lower triangle of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]: Upper triangle of `A` is stored.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n * batchSize` with `lda` is not less than `max(1,n)`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the leading n-by-n upper triangular part of `Aj` contains the upper triangular part of the matrix `Aj`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the leading n-by-n lower triangular part of `Aj` contains the lower triangular part of the matrix `Aj`. On exit, if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], and `info\[j\]` = 0, `Aj` contains the orthonormal eigenvectors of the matrix `Aj`. If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], the contents of `Aj` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `Aj`.`lda` is not less than `max(1,n)`.
/// - `dataTypeW`: Data type of array `W`.
/// - `W`: A real array of dimension `n * batchSize`. The eigenvalue values of `Aj`, in ascending order, i.e., sorted so that `Wj(i) <= Wj(i+1)`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsyevBatched_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsyevBatched_bufferSize`].
/// - `info`: An integer array of dimension `batchSize`. If [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`] is returned, `info\[0\] = -i` (less than zero) indicates `i-th` parameter is wrong (not counting handle). Otherwise, if `info\[i\] = 0`, the operation is successful. If `info\[i\] > 0`, `syevBatched` does not converge on the `i-th` matrix.
/// - `batchSize`: Number of matrices. `batchSize` is not less than 1.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, or `n`lda`batchSize>INT32_MAX`, or `lda<max(1,n)`, or `jobz` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or `uplo` is not [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] or [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`] or `batchSize<0`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevBatched(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
batchSize: i64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXsyevdx_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
vl: *mut ::core::ffi::c_void,
vu: *mut ::core::ffi::c_void,
il: i64,
iu: i64,
h_meig: *mut i64,
dataTypeW: cudaDataType,
W: *const ::core::ffi::c_void,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes all or selection of the eigenvalues and optionally eigenvectors of a symmetric (Hermitian) $n \times n$ matrix `A` using the generic API interface. The standard symmetric eigenvalue problem is:
/// $$
/// A\\*V = V\\*\Lambda
/// $$
///
/// where `Λ` is a real `n×h_meig` diagonal matrix. `V` is an `n×h_meig` unitary matrix. `h_meig` is the number of eigenvalues/eigenvectors computed by the routine, `h_meig` is equal to `n` when the whole spectrum (e.g., `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]) is requested. The diagonal elements of `Λ` are the eigenvalues of `A` in ascending order.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXsyevdx_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = i` (greater than zero), `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `A` contains the orthonormal eigenvectors of the matrix `A`. The eigenvectors are computed by a divide and conquer algorithm.
///
/// Currently, [`cusolverDnXsyevdx`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xsyevdx](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xsyevdx) for a code example.
///
/// **Algorithms supported by cusolverDnXsyevdx**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXsyevdx_bufferSize`] and [`cusolverDnXsyevdx`]:
///
/// The generic API has three different types, `dataTypeA` is data type of the matrix `A`, `dataTypeW` is data type of the matrix `W` and `computeType` is compute type of the operation. [`cusolverDnXsyevdx`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeW** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SSYEVDX` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DSYEVDX` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CHEEVDX` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `ZHEEVDX` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobz`: Specifies options to either compute eigenvalue only or compute eigen-pair: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute eigenvalues only; `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute eigenvalues and eigenvectors.
/// - `range`: Specifies options to which selection of eigenvalues and optionally eigenvectors that need to be computed: `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`]: all eigenvalues/eigenvectors will be found, will becomes the classical syevd/heevd routine; `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_V`]: all eigenvalues/eigenvectors in the half-open interval (vl,vu] will be found; `range` = [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_I`]: the il-th through iu-th eigenvalues/eigenvectors will be found;.
/// - `uplo`: Specifies which part of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`]: Lower triangle of `A` is stored. `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]: Upper triangle of `A` is stored.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`], the leading n-by-n upper triangular part of `A` contains the upper triangular part of the matrix `A`. If `uplo` = [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`], the leading n-by-n lower triangular part of `A` contains the lower triangular part of the matrix `A`. On exit, if `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], and `info` = 0, `A` contains the orthonormal eigenvectors of the matrix `A`. If `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], the contents of `A` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.`lda` is not less than `max(1,n)`.
/// - `dataTypeW`: Data type of array `W`.
/// - `W`: A real array of dimension `n`. The eigenvalue values of `A`, in ascending order, i.e., sorted so that `W(i) <= W(i+1)`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXsyevdx_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXsyevdx_bufferSize`].
/// - `info`: If `info = 0`, the operation is successful. if `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info = i (> 0)`, `info` indicates `i` off-diagonal elements of an intermediate tridiagonal form did not converge to zero.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n<0`, or `lda<max(1,n)`, or `jobz` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `range` is not [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_ALL`] or [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_V`] or [`cusolverEigRange_t::CUSOLVER_EIG_RANGE_I`], or `uplo` is not [`cublasFillMode_t::CUBLAS_FILL_MODE_LOWER`] or [`cublasFillMode_t::CUBLAS_FILL_MODE_UPPER`]).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXsyevdx(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
range: cusolverEigRange_t,
uplo: cublasFillMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
vl: *mut ::core::ffi::c_void,
vu: *mut ::core::ffi::c_void,
il: i64,
iu: i64,
meig64: *mut i64,
dataTypeW: cudaDataType,
W: *mut ::core::ffi::c_void,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgeev_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobvl: cusolverEigMode_t,
jobvr: cusolverEigMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *const ::core::ffi::c_void,
dataTypeVL: cudaDataType,
VL: *const ::core::ffi::c_void,
ldvl: i64,
dataTypeVR: cudaDataType,
VR: *const ::core::ffi::c_void,
ldvr: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// computes for an n-by-n real non-symmetric or complex non-Hermitian matrix `A` the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector `v(j)` of `A` satisfies:
/// $$
/// A\\*v(j) = w(j)\\*v(j)
/// $$
///
/// where `w(j)` is its eigenvalue. The left eigenvalue `u(j)` of `A` satisfies:
/// $$
/// u(j)^{H}\\*A = w(j)\\*v(j)^{H}
/// $$
///
/// where $u(j)^{H}$ denotes the conjugate-transpose of `u(j)`.
///
/// The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real.
///
/// If `A` is real-valued, there are two options to return the eigenvalues in `W`. The first options sets all data types to real-valued types. Then `W` holds `2*n` entries. The first n entries hold the real parts and the last n entries hold the imaginary parts. The LAPACK interface with separate arrays for the real parts `WR` and the imaginary parts `WI` can be recovered by settings pointers `WR = W`, `WI = W+n`. The second option uses a complex data type for `W`. Then `W` is n entries long; each real eigenvalue is stored as a complex number and for each complex conjugate pair, both eigenvalues are returned. The computation is still executed fully in real arithmetic.
///
/// The user has to provide device and host working space which are pointed to by the input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` and `workspaceInBytesOnHost` denote the size in bytes of the device and host working space, and returned by [`cusolverDnXgeev_bufferSize`].
///
/// If the output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). If `info = 0`, the QR algorithm converged and `W` contains the computed eigenvalues of `A` and, if requested, the corresponding left and/or right eigenvectors have been computed. If `info = i` (greater than zero), the QR algorithm failed to compute all the eigenvalues and no eigenvectors have been computed. The elements `i+1:n` of `W` contain eigenvalues which have converged.
///
/// Remark 1: `geev` only supports the computation of right eigenvectors. So, `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] must be set.
///
/// Remark 2: `geev` uses balancing to improve the conditioning of the eigenvalues and eigenvectors.
///
/// Remark 3: `geev` is a hybrid CPU-GPU algorithm. Best performance is attained with pinned host memory.
///
/// Currently, [`cusolverDnXgeev`] supports only the default algorithm.
///
/// Please visit [cuSOLVER Library Samples - Xgeev](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgeev) for a code example.
///
/// **Table of algorithms supported by cusolverDnXgeev**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// List of input arguments for [`cusolverDnXgeev_bufferSize`] and [`cusolverDnXgeev`]:
///
/// The generic API has five different types, `dataTypeA` is the data type of the matrix `A`, `dataTypeW` is the data type of the array `W`, `dataTypeVL` is the data type of the matrix `VL`, `dataTypeVR` is the data type of the matrix `VR` and `computeType` is compute type of the operation. [`cusolverDnXgeev`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeW** | **DataTypeVL** | **DataTypeVR** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SGEEV` |
/// | `CUDA_R_32F` | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | 32F mixed real-complex |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DGEEV` |
/// | `CUDA_R_64F` | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | 64F mixed real-complex |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CGEEV` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `ZGEEV` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobvl`: Specifies whether or not to compute left eigenvectors. `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Do not compute left eigenvectors of A; `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute left eigenvectors of A.
/// - `jobvr`: Specifies whether or not to compute right eigenvectors. `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Do not compute left eigenvectors of A; `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute left eigenvectors of A.
/// - `n`: Number of rows (or columns) of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,n)`. On entry, the n-by-n matrix `A`. On exit, `A` has been overwritten.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeW`: Data type of array `W`.
/// - `W`: Array holding the computed eigenvalues of `A`. Its length is `2*n` if `dataTypeA` = `CUDA_R_32F` and `dataTypeW` = `CUDA_R_32F` or `dataTypeA` = `CUDA_R_64F` and `dataTypeW` = `CUDA_R_64F` and the first n entries of `W` hold the real parts and the last n entries of `W` hold the imaginary parts of the eigenvalues. Otherwise, the length is n.
/// - `dataTypeVL`: Data type of array `VL`.
/// - `VL`: Array of dimension `ldvl * n`. If `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], the left eigenvectors `u(j)` are stored one after another in the columns of `VL`, in the same order as their eigenvalues. If `datatypeVL` is complex or the `j-th` eigenvalue is real, then `u(j) = VL(:,j)`, the `j-th` column of `VL`. If `dataTypeVL` is real and the `j-th` and `(j+1)-st` eigenvalues form a complex conjugate pair, then `u(j) = VL(:,j) + i*VL(:,j+1)` and `u(j+1) = VL(:,j) - i*VL(:,j+1)`. If `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `VL` is not referenced.
/// - `ldvl`: Leading dimension of two-dimensional array used to store matrix `VL` with `ldvl >= 1`. If `jobvl` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `ldvl >= n`.
/// - `dataTypeVR`: Data type of array `VR`.
/// - `VR`: Array of dimension `ldvr * n`. If `jobvr` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], the right eigenvectors `v(j)` are stored one after another in the columns of `VR`, in the same order as their eigenvalues. If `datatypeVR` is complex or the `j-th` eigenvalue is real, then `v(j) = VR(:,j)`, the `j-th` column of `VR`. If `dataTypeVR` is real and the `j-th` and `(j+1)-st` eigenvalues form a complex conjugate pair, then `v(j) = VR(:,j) + i*VR(:,j+1)` and `v(j+1) = VR(:,j) - i*VR(:,j+1)`. If `jobvr` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`], `VR` is not referenced.
/// - `ldvr`: Leading dimension of two-dimensional array used to store matrix `VR` with `ldvr >= 1`. If `jobvr` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `ldvr >= n`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgeev_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgeev_bufferSize`].
/// - `info`: If `info = 0`, the operation is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info = i` (greater than zero), the QR algorithm failed to compute all the eigenvalues and no eigenvectors have been computed; elements `i+1:n` of `W` contain eigenvalues which have converged.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`jobvl` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `jobvr` is not [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`] or [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], `n<0`, or `lda < max(1,n)`, or `ldvl < n` if `jobvl` is [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`], or `ldvr < n` if `jobvr` is [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgeev(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobvl: cusolverEigMode_t,
jobvr: cusolverEigMode_t,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeW: cudaDataType,
W: *mut ::core::ffi::c_void,
dataTypeVL: cudaDataType,
VL: *mut ::core::ffi::c_void,
ldvl: i64,
dataTypeVR: cudaDataType,
VR: *mut ::core::ffi::c_void,
ldvr: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgesvd_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeS: cudaDataType,
S: *const ::core::ffi::c_void,
dataTypeU: cudaDataType,
U: *const ::core::ffi::c_void,
ldu: i64,
dataTypeVT: cudaDataType,
VT: *const ::core::ffi::c_void,
ldvt: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXgesvd_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle). if `bdsqr` did not converge, `info` specifies how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// Currently, [`cusolverDnXgesvd`] supports only the default algorithm.
///
/// **Algorithms supported by cusolverDnXgesvd**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// Please visit [cuSOLVER Library Samples - Xgesvd](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgesvd) for a code example.
///
/// Remark 1: `gesvd` only supports `m>=n`.
///
/// Remark 2: the routine returns $V^H$, not `V`.
///
/// List of input arguments for [`cusolverDnXgesvd_bufferSize`] and [`cusolverDnXgesvd`]:
///
/// The generic API has three different types, `dataTypeA` is data type of the matrix `A`, `dataTypeS` is data type of the vector `S` and `dataTypeU` is data type of the matrix `U`, `dataTypeVT` is data type of the matrix `VT`, `computeType` is compute type of the operation. [`cusolverDnXgesvd`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeS** | **DataTypeU** | **DataTypeVT** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SGESVD` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DGESVD` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CGESVD` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `ZGESVD` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobu`: Specifies options for computing all or part of the matrix `U`: = ‘A’: all m columns of U are returned in array U: = ‘S’: the first min(m,n) columns of U (the left singular vectors) are returned in the array U; = ‘O’: the first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; = ‘N’: no columns of U (no left singular vectors) are computed.
/// - `jobvt`: Specifies options for computing all or part of the matrix V\*\*T: = ‘A’: all N rows of V\*\*T are returned in the array VT; = ‘S’: the first min(m,n) rows of V\*\*T (the right singular vectors) are returned in the array VT; = ‘O’: the first min(m,n) rows of V\*\*T (the right singular vectors) are overwritten on the array A; = ‘N’: no rows of V\*\*T (no right singular vectors) are computed.
/// - `m`: Number of rows of matrix `A`.
/// - `n`: Number of columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,m)`. On exit, if `jobu` = ‘O’, `A` is overwritten with `U`; if `jobvt` = ‘O’, `A` is overwritten with `VT`; otherwise, the contents of `A` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeS`: Data type of array `S`.
/// - `S`: Real array of dimension `min(m,n)`. The singular values of A, sorted so that `S(i) >= S(i+1)`.
/// - `dataTypeU`: Data type of array `U`.
/// - `U`: Array of dimension `ldu * m` with `ldu` is not less than `max(1,m)`. If `jobu` = ‘A’, `U` contains the $m \times m$ unitary matrix `U`. If `jobu` = ‘S’, `U` contains the first min(m,n) columns of U. If `jobu` = ‘N’ or ‘O’, `U` is not referenced.
/// - `ldu`: Leading dimension of two-dimensional array used to store matrix `U`. If `jobu` = ‘A’ or ‘S’, `ldu >= max(1,m)`. Otherwise, `ldu >= 1`.
/// - `dataTypeVT`: Data type of array `VT`.
/// - `VT`: Array of dimension `ldvt * n` with `ldvt` is not less than `max(1,n)`. If `jobvt` = ‘A’, `VT` contains the $n \times n$ unitary matrix V\*\*T. If `jobvt` = ‘S’, `VT` contains the first min(m,n) rows of V\*\*T. If `jobvt` = ‘N’ or ‘O’, `VT` is not referenced.
/// - `ldvt`: Leading dimension of two-dimensional array used to store matrix `VT`. If `jobvt` = ‘A’, `ldvt >= max(1,n)`. If `jobvt` = ‘S’, `ldvt >= max(1,min(m,n))`. Otherwise, `ldvt >= 1`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgesvd_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgesvd_bufferSize`].
/// - `info`: If `info = 0`, the operation is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle). If `info > 0`, `info` indicates how many superdiagonals of an intermediate bidiagonal form did not converge to zero.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`m,n<0`, or `lda<max(1,m)`, or `ldu<1`, or if `jobu` = 'S' or 'A', `ldu` < m, or `ldvt<1`, or if `jobvt` = ‘A’ `ldvt<n`, or if `jobvt` = ‘S’ `ldvt<min(m,n)`, or `jobu`, `jobvt` are none of ‘N’, ‘O’, ‘S’, ‘A’, or `jobu` = `jobvt` = ‘O’ ).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvd(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobu: ::core::ffi::c_schar,
jobvt: ::core::ffi::c_schar,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeS: cudaDataType,
S: *mut ::core::ffi::c_void,
dataTypeU: cudaDataType,
U: *mut ::core::ffi::c_void,
ldu: i64,
dataTypeVT: cudaDataType,
VT: *mut ::core::ffi::c_void,
ldvt: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgesvdp_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeS: cudaDataType,
S: *const ::core::ffi::c_void,
dataTypeU: cudaDataType,
U: *const ::core::ffi::c_void,
ldu: i64,
dataTypeV: cudaDataType,
V: *const ::core::ffi::c_void,
ldv: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The routine below:
///
/// This function computes the singular value decomposition (SVD) of an $m \times n$ matrix `A` and corresponding the left and/or right singular vectors. The SVD is written:
/// $$
/// A = U\\*\Sigma\\*V^H
/// $$
///
/// where $\Sigma$ is an $m \times n$ matrix which is zero except for its `min(m,n)` diagonal elements, `U` is an $m \times m$ unitary matrix, and `V` is an $n \times n$ unitary matrix. The diagonal elements of $\Sigma$ are the singular values of `A`; they are real and non-negative, and are returned in descending order. The first `min(m,n)` columns of `U` and `V` are the left and right singular vectors of `A`.
///
/// [`cusolverDnXgesvdp`] combines polar decomposition in \[14\] and [`cusolverDnXsyevd`] to compute SVD. It is much faster than [`cusolverDnXgesvd`] which is based on QR algorithm. However polar decomposition in \[14\] may not deliver a full unitary matrix when the matrix A has a singular value close to zero. To workaround the issue when the singular value is close to zero, we add a small perturbation so polar decomposition can deliver the correct result. The consequence is inaccurate singular values shifted by this perturbation. The output parameter `h_err_sigma` is the magnitude of this perturbation. In other words, `h_err_sigma` shows the accuracy of SVD.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXgesvdp_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Currently, [`cusolverDnXgesvdp`] supports only the default algorithm.
///
/// **Algorithms supported by cusolverDnXgesvdp**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// Please visit [cuSOLVER Library Samples - Xgesvdp](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgesvdp) for a code example.
///
/// Remark 1: `gesvdp` supports `n>=m` as well.
///
/// Remark 2: the routine returns `V`, not $V^{H}$
///
/// List of input arguments for [`cusolverDnXgesvdp_bufferSize`] and [`cusolverDnXgesvdp`]:
///
/// The generic API has three different types, `dataTypeA` is data type of the matrix `A`, `dataTypeS` is data type of the vector `S` and `dataTypeU` is data type of the matrix `U`, `dataTypeV` is data type of the matrix `V`, `computeType` is compute type of the operation. [`cusolverDnXgesvdp`] only supports the following four combinations:
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeS** | **DataTypeU** | **DataTypeV** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SGESVDP` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DGESVDP` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CGESVDP` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `ZGESVDP` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobz`: Specifies options to either compute singular values only or compute singular vectors as well: `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_NOVECTOR`]: Compute singular values only. `jobz` = [`cusolverEigMode_t::CUSOLVER_EIG_MODE_VECTOR`]: Compute singular values and singular vectors.
/// - `econ`: `econ = 1` for economy size for `U` and `V`.
/// - `m`: Number of rows of matrix `A`.
/// - `n`: Number of columns of matrix `A`.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,m)`. On exit, the contents of `A` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `dataTypeS`: Data type of array `S`.
/// - `S`: Real array of dimension `min(m,n)`. The singular values of A, sorted so that `S(i) >= S(i+1)`.
/// - `dataTypeU`: Data type of array `U`.
/// - `U`: Array of dimension `ldu * m` with `ldu` is not less than `max(1,m)`. `U` contains the $m \times m$ unitary matrix `U`. If `econ=1`, only reports first `min(m,n)` columns of `U`.
/// - `ldu`: Leading dimension of two-dimensional array used to store matrix `U`.
/// - `dataTypeV`: Data type of array `V`.
/// - `V`: Array of dimension `ldv * n` with `ldv` is not less than `max(1,n)`. `V` contains the $n \times n$ unitary matrix V. if `econ=1`, only reports first `min(m,n)` columns of `V`.
/// - `ldv`: Leading dimension of two-dimensional array used to store matrix `V`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgesvdp_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgesvdp_bufferSize`].
/// - `h_err_sigma`: Magnitude of the perturbation, showing the accuracy of SVD.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`m,n<0` or `lda<max(1,m)` or `ldu<max(1,m)` or `ldv<max(1,n)`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdp(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobz: cusolverEigMode_t,
econ: ::core::ffi::c_int,
m: i64,
n: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeS: cudaDataType,
S: *mut ::core::ffi::c_void,
dataTypeU: cudaDataType,
U: *mut ::core::ffi::c_void,
ldu: i64,
dataTypeV: cudaDataType,
V: *mut ::core::ffi::c_void,
ldv: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
d_info: *mut ::core::ffi::c_int,
h_err_sigma: *mut f64,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXgesvdr_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobu: ::core::ffi::c_schar,
jobv: ::core::ffi::c_schar,
m: i64,
n: i64,
k: i64,
p: i64,
niters: i64,
dataTypeA: cudaDataType,
A: *const ::core::ffi::c_void,
lda: i64,
dataTypeSrand: cudaDataType,
Srand: *const ::core::ffi::c_void,
dataTypeUrand: cudaDataType,
Urand: *const ::core::ffi::c_void,
ldUrand: i64,
dataTypeVrand: cudaDataType,
Vrand: *const ::core::ffi::c_void,
ldVrand: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The routine below
///
/// This function computes the approximated rank-k singular value decomposition (k-SVD) of an $m \times n$ matrix `A` and the corresponding left and/or right singular vectors. The k-SVD is written as:
/// $$
/// A_{k}\approx U\\*\Sigma\\*V^{H}
/// $$
///
/// where $\Sigma$ is a $k \times k$ matrix which is zero except for its diagonal elements, `U` is an $m \times k$ orthonormal matrix, and `V` is an $k \times n$ orthonormal matrix. The diagonal elements of $\Sigma$ are the approximated singular values of `A`; they are real and non-negative, and are returned in descending order. The columns of `U` and `V` are the top-`k` left and right singular vectors of `A`.
///
/// [`cusolverDnXgesvdr`] implements randomized methods described in \[15\] to compute k-SVD that is accurate with high probability if the conditions described in \[15\] hold. [`cusolverDnXgesvdr`] is intended to compute a very small portion of the spectrum (meaning that `k` is very small compared to `min(m,n)`). of `A` fast and with good quality, specially when the dimensions of the matrix are large.
///
/// The accuracy of the method depends on the spectrum of `A`, the number of power iterations `niters`, the oversampling parameter `p` and the ratio between `p` and the dimensions of the matrix `A`. Larger values of oversampling `p` or larger number of iterations `niters` might produce more accurate approximations, but it will also increase the run time of [`cusolverDnXgesvdr`].
///
/// Our recommendation is to use two iterations and set the oversampling to at least `2k`. Once the solver provides enough accuracy, adjust the values of `k` and `niters` for better performance.
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXgesvdr_bufferSize`].
///
/// If output parameter `info = -i` (less than zero), the `i-th` parameter is wrong (not counting handle).
///
/// Currently, [`cusolverDnXgesvdr`] supports only the default algorithm.
///
/// **Algorithms supported by cusolverDnXgesvdr**
///
/// | | |
/// | --- | --- |
/// | [`cusolverAlgMode_t::CUSOLVER_ALG_0`] or `NULL` | Default algorithm. |
///
/// Please visit [cuSOLVER Library Samples - Xgesvdr](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSOLVER/Xgesvdr) for a code example.
///
/// Remark 1: `gesvdr` supports `n>=m` as well.
///
/// Remark 2: the routine returns `V`, not $V^{H}$
///
/// List of input arguments for [`cusolverDnXgesvdr_bufferSize`] and [`cusolverDnXgesvdr`]:
///
/// The generic API has five different types, `dataTypeA` is data type of the matrix `A`, `dataTypeS` is data type of the vector `S` and `dataTypeU` is data type of the matrix `U`, `dataTypeV` is data type of the matrix `V`, `computeType` is compute type of the operation. [`cusolverDnXgesvdr`] only supports the following four combinations.
///
/// **Valid combination of data type and compute type**
///
/// | **DataTypeA** | **DataTypeS** | **DataTypeU** | **DataTypeV** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SGESVDR` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DGESVDR` |
/// | `CUDA_C_32F` | `CUDA_R_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CGESVDR` |
/// | `CUDA_C_64F` | `CUDA_R_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `ZGESVDR` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `jobu`: Specifies options for computing all or part of the matrix `U`: = ‘S’: the first k columns of U (the left singular vectors) are returned in the array U; = ‘N’: no columns of U (no left singular vectors) are computed.
/// - `jobv`: Specifies options for computing all or part of the matrix V: = ‘S’: the first k rows of V (the right singular vectors) are returned in the array V; = ‘N’: no rows of V (no right singular vectors) are computed.
/// - `m`: Number of rows of matrix `A`.
/// - `n`: Number of columns of matrix `A`.
/// - `k`: Rank of the k-SVD decomposition of matrix `A`. `rank` is less than `min(m,n)`.
/// - `p`: Oversampling. The size of the subspace will be `(k + p)`. `(k+p)` is less than `min(m,n)`.
/// - `niters`: Number of iteration of power method.
/// - `dataTypeA`: Data type of array `A`.
/// - `A`: Array of dimension `lda * n` with `lda` is not less than `max(1,m)`. On exit, the contents of `A` are destroyed.
/// - `lda`: Leading dimension of two-dimensional array used to store matrix `A`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXgesvdr_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXgesvdr_bufferSize`].
/// - `d_info`: If `info = 0`, the operation is successful. If `info = -i`, the `i-th` parameter is wrong (not counting handle).
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`m,n<0` or `lda<max(1,m)` or `ldu<max(1,m)` or `ldv<max(1,n)` ).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXgesvdr(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
jobu: ::core::ffi::c_schar,
jobv: ::core::ffi::c_schar,
m: i64,
n: i64,
k: i64,
p: i64,
niters: i64,
dataTypeA: cudaDataType,
A: *mut ::core::ffi::c_void,
lda: i64,
dataTypeSrand: cudaDataType,
Srand: *mut ::core::ffi::c_void,
dataTypeUrand: cudaDataType,
Urand: *mut ::core::ffi::c_void,
ldUrand: i64,
dataTypeVrand: cudaDataType,
Vrand: *mut ::core::ffi::c_void,
ldVrand: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
d_info: *mut ::core::ffi::c_int,
) -> cusolverStatus_t;
}
unsafe extern "C" {
pub fn cusolverDnXlarft_bufferSize(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
direct: cusolverDirectMode_t,
storev: cusolverStorevMode_t,
n: i64,
k: i64,
dataTypeV: cudaDataType,
V: *const ::core::ffi::c_void,
ldv: i64,
dataTypeTau: cudaDataType,
tau: *const ::core::ffi::c_void,
dataTypeT: cudaDataType,
T: *mut ::core::ffi::c_void,
ldt: i64,
computeType: cudaDataType,
workspaceInBytesOnDevice: *mut size_t,
workspaceInBytesOnHost: *mut size_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// The helper functions below can calculate the sizes needed for pre-allocated buffer.
///
/// The following routine:
///
/// forms the triangular factor `T` of a real block reflector `H` of order `n`, which is defined as a product of `k` elementary reflectors.
/// If:
///
/// Only `storev == CUBLAS_STOREV_COLUMNWISE` is supported, which indicates that the vector defining the elementary reflector `H(i)` is stored in the i-th column of the array `V`, and $H = I - V \\* T \\* V^{T}$ ($H = I - V \\* T \\* V^{H}$ for complex types).
///
/// The user has to provide device and host working spaces which are pointed by input parameters `bufferOnDevice` and `bufferOnHost`. The input parameters `workspaceInBytesOnDevice` (and `workspaceInBytesOnHost`) is size in bytes of the device (and host) working space, and it is returned by [`cusolverDnXlarft_bufferSize`].
///
/// Currently, only `n >= k` scenario is supported.
///
/// The generic API has four different types:
///
/// [`cusolverDnXlarft`] only supports the following four combinations.
///
/// **Valid combinations of data types and compute types**
///
/// | **DataTypeV** | **DataTypeTau** | **DataTypeT** | **ComputeType** | **Meaning** |
/// | --- | --- | --- | --- | --- |
/// | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `CUDA_R_32F` | `SLARFT` |
/// | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `CUDA_R_64F` | `DLARFT` |
/// | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CUDA_C_32F` | `CLARFT` |
/// | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `CUDA_C_64F` | `ZLARFT` |
///
/// # Parameters
///
/// - `handle`: Handle to the cuSolverDN library context.
/// - `params`: Structure with information collected by [`cusolverDnSetAdvOptions`].
/// - `direct`: Specifies the order in which the elementary reflectors are multiplied to form the block reflector.
/// - `storev`: Specifies how the vectors which define the elementary reflectors are stored.
/// - `n`: The order of the block reflector `H`. `n >= 0`.
/// - `k`: The order of the triangular factor `T` (= the number of elementary reflectors). `k >= 1`.
/// - `dataTypeV`: Data type of array `V`.
/// - `V`: The matrix `V` of dimension `lda * k`.
/// - `ldv`: Leading dimension of the array `V`. `ldv >= max(1,n)`.
/// - `dataTypeTau`: Data type of array `tau`.
/// - `tau`: Dimension `k`. `tau(i)` must contain the scalar factor of the elementary reflector `H(i)`.
/// - `dataTypeT`: Data type of array `T`.
/// - `T`: Dimension `ldt * k`. The $k \times k$ triangular factor `T` of the block reflector. If `direct == CUBLAS_DIRECT_FORWARD`, `T` is upper triangular; if `direct == CUBLAS_DIRECT_BACKWARD`, `T` is lower triangular.
/// - `ldt`: The leading dimension of the array `T`. `ldt >= k`.
/// - `computeType`: Data type of computation.
/// - `bufferOnDevice`: Device workspace. Array of type `void` of size `workspaceInBytesOnDevice` bytes.
/// - `workspaceInBytesOnDevice`: Size in bytes of `bufferOnDevice`, returned by [`cusolverDnXlarft_bufferSize`].
/// - `bufferOnHost`: Host workspace. Array of type `void` of size `workspaceInBytesOnHost` bytes.
/// - `workspaceInBytesOnHost`: Size in bytes of `bufferOnHost`, returned by [`cusolverDnXlarft_bufferSize`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INTERNAL_ERROR`]: An internal operation failed.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: Invalid parameters were passed (`n == 0`, `k > n`, or `storev == CUBLAS_STOREV_ROWWISE`).
/// - [`cusolverStatus_t::CUSOLVER_STATUS_NOT_INITIALIZED`]: The library was not initialized.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: The operation completed successfully.
pub fn cusolverDnXlarft(
handle: cusolverDnHandle_t,
params: cusolverDnParams_t,
direct: cusolverDirectMode_t,
storev: cusolverStorevMode_t,
n: i64,
k: i64,
dataTypeV: cudaDataType,
V: *const ::core::ffi::c_void,
ldv: i64,
dataTypeTau: cudaDataType,
tau: *const ::core::ffi::c_void,
dataTypeT: cudaDataType,
T: *mut ::core::ffi::c_void,
ldt: i64,
computeType: cudaDataType,
bufferOnDevice: *mut ::core::ffi::c_void,
workspaceInBytesOnDevice: size_t,
bufferOnHost: *mut ::core::ffi::c_void,
workspaceInBytesOnHost: size_t,
) -> cusolverStatus_t;
}
/// cusolverDnLoggerCallback_t is a callback function pointer type.
///
/// Use the below function to set the callback function: [`cusolverDnLoggerSetCallback`].
pub type cusolverDnLoggerCallback_t = ::core::option::Option<
unsafe extern "C" fn(
logLevel: ::core::ffi::c_int,
functionName: *const ::core::ffi::c_char,
message: *const ::core::ffi::c_char,
),
>;
unsafe extern "C" {
/// This function sets the logging callback function.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Parameters
///
/// - `callback`: Pointer to a callback function. See [`cusolverDnLoggerCallback_t`].
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If the callback function was successfully set.
pub fn cusolverDnLoggerSetCallback(
callback: cusolverDnLoggerCallback_t,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the logging output file. Note: once registered using this function call, the provided file handle must not be closed unless the function is called again to switch to a different file handle.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Parameters
///
/// - `file`: Pointer to an open file. File should have write permission.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If logging file was successfully set.
pub fn cusolverDnLoggerSetFile(file: *mut FILE) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function opens a logging output file in the given path.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Parameters
///
/// - `logFile`: Path of the logging output file.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If the logging file was successfully opened.
pub fn cusolverDnLoggerOpenFile(
logFile: *const ::core::ffi::c_char,
) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the value of the logging level.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Parameters
///
/// - `level`: Value of the logging level. See cuSOLVERDn Logging.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_INVALID_VALUE`]: If the value was not a valid logging level. See cuSOLVERDn Logging.
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If the logging level was successfully set.
pub fn cusolverDnLoggerSetLevel(level: ::core::ffi::c_int) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function sets the value of the logging mask.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Parameters
///
/// - `mask`: Value of the logging mask. See cuSOLVERDn Logging.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If the logging mask was successfully set.
pub fn cusolverDnLoggerSetMask(mask: ::core::ffi::c_int) -> cusolverStatus_t;
}
unsafe extern "C" {
/// This function disables logging for the entire run.
///
/// See [`cusolverStatus_t`] for a complete list of valid return codes.
///
/// # Return value
///
/// - [`cusolverStatus_t::CUSOLVER_STATUS_SUCCESS`]: If logging was successfully disabled.
pub fn cusolverDnLoggerForceDisable() -> cusolverStatus_t;
}