/* automatically generated by rust-bindgen 0.72.1 */
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct __BindgenBitfieldUnit<Storage> {
storage: Storage,
}
impl<Storage> __BindgenBitfieldUnit<Storage> {
#[inline]
pub const fn new(storage: Storage) -> Self {
Self { storage }
}
}
impl<Storage> __BindgenBitfieldUnit<Storage>
where
Storage: AsRef<[u8]> + AsMut<[u8]>,
{
#[inline]
fn extract_bit(byte: u8, index: usize) -> bool {
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
byte & mask == mask
}
#[inline]
pub fn get_bit(&self, index: usize) -> bool {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = self.storage.as_ref()[byte_index];
Self::extract_bit(byte, index)
}
#[inline]
pub unsafe fn raw_get_bit(this: *const Self, index: usize) -> bool {
debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
let byte_index = index / 8;
let byte = unsafe {
*(core::ptr::addr_of!((* this).storage) as *const u8)
.offset(byte_index as isize)
};
Self::extract_bit(byte, index)
}
#[inline]
fn change_bit(byte: u8, index: usize, val: bool) -> u8 {
let bit_index = if cfg!(target_endian = "big") {
7 - (index % 8)
} else {
index % 8
};
let mask = 1 << bit_index;
if val { byte | mask } else { byte & !mask }
}
#[inline]
pub fn set_bit(&mut self, index: usize, val: bool) {
debug_assert!(index / 8 < self.storage.as_ref().len());
let byte_index = index / 8;
let byte = &mut self.storage.as_mut()[byte_index];
*byte = Self::change_bit(*byte, index, val);
}
#[inline]
pub unsafe fn raw_set_bit(this: *mut Self, index: usize, val: bool) {
debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
let byte_index = index / 8;
let byte = unsafe {
(core::ptr::addr_of_mut!((* this).storage) as *mut u8)
.offset(byte_index as isize)
};
unsafe { *byte = Self::change_bit(*byte, index, val) };
}
#[inline]
pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
);
let mut val = 0;
for i in 0..(bit_width as usize) {
if self.get_bit(i + bit_offset) {
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
val |= 1 << index;
}
}
val
}
#[inline]
pub unsafe fn raw_get(this: *const Self, bit_offset: usize, bit_width: u8) -> u64 {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
);
let mut val = 0;
for i in 0..(bit_width as usize) {
if unsafe { Self::raw_get_bit(this, i + bit_offset) } {
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
val |= 1 << index;
}
}
val
}
#[inline]
pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
);
for i in 0..(bit_width as usize) {
let mask = 1 << i;
let val_bit_is_set = val & mask == mask;
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
self.set_bit(index + bit_offset, val_bit_is_set);
}
}
#[inline]
pub unsafe fn raw_set(this: *mut Self, bit_offset: usize, bit_width: u8, val: u64) {
debug_assert!(bit_width <= 64);
debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
debug_assert!(
(bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
);
for i in 0..(bit_width as usize) {
let mask = 1 << i;
let val_bit_is_set = val & mask == mask;
let index = if cfg!(target_endian = "big") {
bit_width as usize - 1 - i
} else {
i
};
unsafe { Self::raw_set_bit(this, index + bit_offset, val_bit_is_set) };
}
}
}
pub const CUSPARSE_VER_MAJOR: u32 = 12;
pub const CUSPARSE_VER_MINOR: u32 = 7;
pub const CUSPARSE_VER_PATCH: u32 = 9;
pub const CUSPARSE_VER_BUILD: u32 = 17;
pub const CUSPARSE_VERSION: u32 = 12709;
pub type __uint64_t = ::core::ffi::c_ulong;
pub type __off_t = ::core::ffi::c_long;
pub type __off64_t = ::core::ffi::c_long;
#[repr(C)]
#[repr(align(8))]
#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
pub struct float2 {
pub x: f32,
pub y: f32,
}
#[repr(C)]
#[repr(align(16))]
#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
pub struct double2 {
pub x: f64,
pub y: f64,
}
pub type cuFloatComplex = float2;
pub type cuDoubleComplex = double2;
pub type cuComplex = cuFloatComplex;
pub type size_t = ::core::ffi::c_ulong;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CUstream_st {
_unused: [u8; 0],
}
pub type FILE = _IO_FILE;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_marker {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_codecvt {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct _IO_wide_data {
_unused: [u8; 0],
}
pub type _IO_lock_t = ::core::ffi::c_void;
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub struct _IO_FILE {
pub _flags: ::core::ffi::c_int,
pub _IO_read_ptr: *mut ::core::ffi::c_char,
pub _IO_read_end: *mut ::core::ffi::c_char,
pub _IO_read_base: *mut ::core::ffi::c_char,
pub _IO_write_base: *mut ::core::ffi::c_char,
pub _IO_write_ptr: *mut ::core::ffi::c_char,
pub _IO_write_end: *mut ::core::ffi::c_char,
pub _IO_buf_base: *mut ::core::ffi::c_char,
pub _IO_buf_end: *mut ::core::ffi::c_char,
pub _IO_save_base: *mut ::core::ffi::c_char,
pub _IO_backup_base: *mut ::core::ffi::c_char,
pub _IO_save_end: *mut ::core::ffi::c_char,
pub _markers: *mut _IO_marker,
pub _chain: *mut _IO_FILE,
pub _fileno: ::core::ffi::c_int,
pub _bitfield_align_1: [u32; 0],
pub _bitfield_1: __BindgenBitfieldUnit<[u8; 3usize]>,
pub _short_backupbuf: [::core::ffi::c_char; 1usize],
pub _old_offset: __off_t,
pub _cur_column: ::core::ffi::c_ushort,
pub _vtable_offset: ::core::ffi::c_schar,
pub _shortbuf: [::core::ffi::c_char; 1usize],
pub _lock: *mut _IO_lock_t,
pub _offset: __off64_t,
pub _codecvt: *mut _IO_codecvt,
pub _wide_data: *mut _IO_wide_data,
pub _freeres_list: *mut _IO_FILE,
pub _freeres_buf: *mut ::core::ffi::c_void,
pub _prevchain: *mut *mut _IO_FILE,
pub _mode: ::core::ffi::c_int,
pub _unused3: ::core::ffi::c_int,
pub _total_written: __uint64_t,
pub _unused2: [::core::ffi::c_char; 8usize],
}
impl Default for _IO_FILE {
fn default() -> Self {
let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
unsafe {
::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
s.assume_init()
}
}
}
impl _IO_FILE {
#[inline]
pub fn _flags2(&self) -> ::core::ffi::c_int {
unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 24u8) as u32) }
}
#[inline]
pub fn set__flags2(&mut self, val: ::core::ffi::c_int) {
unsafe {
let val: u32 = ::core::mem::transmute(val);
self._bitfield_1.set(0usize, 24u8, val as u64)
}
}
#[inline]
pub unsafe fn _flags2_raw(this: *const Self) -> ::core::ffi::c_int {
unsafe {
::core::mem::transmute(
<__BindgenBitfieldUnit<
[u8; 3usize],
>>::raw_get(::core::ptr::addr_of!((* this)._bitfield_1), 0usize, 24u8)
as u32,
)
}
}
#[inline]
pub unsafe fn set__flags2_raw(this: *mut Self, val: ::core::ffi::c_int) {
unsafe {
let val: u32 = ::core::mem::transmute(val);
<__BindgenBitfieldUnit<
[u8; 3usize],
>>::raw_set(
::core::ptr::addr_of_mut!((* this)._bitfield_1),
0usize,
24u8,
val as u64,
)
}
}
#[inline]
pub fn new_bitfield_1(
_flags2: ::core::ffi::c_int,
) -> __BindgenBitfieldUnit<[u8; 3usize]> {
let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 3usize]> = Default::default();
__bindgen_bitfield_unit
.set(
0usize,
24u8,
{
let _flags2: u32 = unsafe { ::core::mem::transmute(_flags2) };
_flags2 as u64
},
);
__bindgen_bitfield_unit
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseContext {
_unused: [u8; 0],
}
/// This is a pointer type to an opaque cuSPARSE context, which the user must initialize by calling prior to calling [`cusparseCreate`] any other library function. The handle created and returned by [`cusparseCreate`] must be passed to every cuSPARSE function.
pub type cusparseHandle_t = *mut cusparseContext;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseMatDescr {
_unused: [u8; 0],
}
/// This structure is used to describe the shape and properties of a matrix.
pub type cusparseMatDescr_t = *mut cusparseMatDescr;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrsv2Info {
_unused: [u8; 0],
}
pub type bsrsv2Info_t = *mut bsrsv2Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrsm2Info {
_unused: [u8; 0],
}
pub type bsrsm2Info_t = *mut bsrsm2Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csric02Info {
_unused: [u8; 0],
}
pub type csric02Info_t = *mut csric02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsric02Info {
_unused: [u8; 0],
}
pub type bsric02Info_t = *mut bsric02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csrilu02Info {
_unused: [u8; 0],
}
pub type csrilu02Info_t = *mut csrilu02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrilu02Info {
_unused: [u8; 0],
}
pub type bsrilu02Info_t = *mut bsrilu02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csru2csrInfo {
_unused: [u8; 0],
}
pub type csru2csrInfo_t = *mut csru2csrInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseColorInfo {
_unused: [u8; 0],
}
/// This is a pointer type to an opaque structure holding the information used in `csrcolor()`.
#[deprecated]
pub type cusparseColorInfo_t = *mut cusparseColorInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct pruneInfo {
_unused: [u8; 0],
}
pub type pruneInfo_t = *mut pruneInfo;
/// This data type represents the status returned by the library functions and it can have the following values.
#[repr(u32)]
#[derive(
Debug,
Copy,
Clone,
Hash,
PartialOrd,
Ord,
PartialEq,
Eq,
TryFromPrimitive,
IntoPrimitive,
)]
pub enum cusparseStatus_t {
/// The operation completed successfully.
CUSPARSE_STATUS_SUCCESS = 0,
/// The cuSPARSE library was not initialized. This is usually caused by the lack of a prior call, an error in the CUDA Runtime API called by the cuSPARSE routine, or an error in the hardware setup
///
/// **To correct:** call [`cusparseCreate`] prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed The error also applies to generic APIs (cuSPARSE Generic APIs) for indicating a matrix/vector descriptor not initialized.
CUSPARSE_STATUS_NOT_INITIALIZED = 1,
/// Resource allocation failed inside the cuSPARSE library. This is usually caused by a device memory allocation (`cudaMalloc()`) or by a host memory allocation failure
///
/// **To correct:** prior to the function call, deallocate previously allocated memory as much as possible.
CUSPARSE_STATUS_ALLOC_FAILED = 2,
/// An unsupported value or parameter was passed to the function (a negative vector size, for example)
///
/// **To correct:** ensure that all the parameters being passed have valid values.
CUSPARSE_STATUS_INVALID_VALUE = 3,
/// The function requires a feature absent from the device architecture
///
/// **To correct:** compile and run the application on a device with appropriate compute capability.
CUSPARSE_STATUS_ARCH_MISMATCH = 4,
CUSPARSE_STATUS_MAPPING_ERROR = 5,
/// The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons
///
/// **To correct:** check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed.
CUSPARSE_STATUS_EXECUTION_FAILED = 6,
/// An internal cuSPARSE operation failed
///
/// **To correct:** check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine completion.
CUSPARSE_STATUS_INTERNAL_ERROR = 7,
/// The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function
///
/// **To correct:** check that the fields in `cusparseMatDescr_t descrA` were set correctly.
CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
CUSPARSE_STATUS_ZERO_PIVOT = 9,
/// The operation or data type combination is currently not supported by the function.
CUSPARSE_STATUS_NOT_SUPPORTED = 10,
/// The resources for the computation, such as GPU global or shared memory, are not sufficient to complete the operation. The error can also indicate that the current computation mode (e.g. bit size of sparse matrix indices) does not allow to handle the given input.
CUSPARSE_STATUS_INSUFFICIENT_RESOURCES = 11,
}
/// This type indicates whether the scalar values are passed by reference on the host or device. It is important to point out that if several scalar values are passed by reference in the function call, all of them will conform to the same single pointer mode. The pointer mode can be set and retrieved using [`cusparseSetPointerMode`] and [`cusparseGetPointerMode`] routines, respectively.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparsePointerMode_t {
/// The scalars are passed by reference on the host.
CUSPARSE_POINTER_MODE_HOST = 0,
/// The scalars are passed by reference on the device.
CUSPARSE_POINTER_MODE_DEVICE = 1,
}
/// This type indicates whether the operation is performed only on indices or on data and indices.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseAction_t {
/// the operation is performed only on indices.
CUSPARSE_ACTION_SYMBOLIC = 0,
/// the operation is performed on data and indices.
CUSPARSE_ACTION_NUMERIC = 1,
}
/// This type indicates the type of matrix stored in sparse storage. Notice that for symmetric, Hermitian and triangular matrices only their lower or upper part is assumed to be stored.
///
/// The whole idea of matrix type and fill mode is to keep minimum storage for symmetric/Hermitian matrix, and also to take advantage of symmetric property on SpMV (Sparse Matrix Vector multiplication). To compute `y=A*x` when `A` is symmetric and only lower triangular part is stored, two steps are needed. First step is to compute `y=(L+D)*x` and second step is to compute `y=L^T*x + y`. Given the fact that the transpose operation `y=L^T*x` is 10x slower than non-transpose version `y=L*x`, the symmetric property does not show up any performance gain. It is better for the user to extend the symmetric matrix to a general matrix and apply `y=A*x` with matrix type [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
///
/// In general, SpMV, preconditioners (incomplete Cholesky or incomplete LU) and triangular solver are combined together in iterative solvers, for example PCG and GMRES. If the user always uses general matrix (instead of symmetric matrix), there is no need to support other than general matrix in preconditioners. Therefore the new routines, `\[bsr|csr\]sv2` (triangular solver), `\[bsr|csr\]ilu02` (incomplete LU) and `\[bsr|csr\]ic02` (incomplete Cholesky), only support matrix type [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseMatrixType_t {
/// the matrix is general.
CUSPARSE_MATRIX_TYPE_GENERAL = 0,
/// the matrix is symmetric.
CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1,
/// the matrix is Hermitian.
CUSPARSE_MATRIX_TYPE_HERMITIAN = 2,
/// the matrix is triangular.
CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3,
}
/// This type indicates if the lower or upper part of a matrix is stored in sparse storage.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseFillMode_t {
/// The lower triangular part is stored.
CUSPARSE_FILL_MODE_LOWER = 0,
/// The upper triangular part is stored.
CUSPARSE_FILL_MODE_UPPER = 1,
}
/// This type indicates if the matrix diagonal entries are unity. The diagonal elements are always assumed to be present, but if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is passed to an API routine, then the routine assumes that all diagonal entries are unity and will not read or modify those entries. Note that in this case the routine assumes the diagonal entries are equal to one, regardless of what those entries are actually set to in memory.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseDiagType_t {
/// The matrix diagonal has non-unit elements.
CUSPARSE_DIAG_TYPE_NON_UNIT = 0,
/// The matrix diagonal has unit elements.
CUSPARSE_DIAG_TYPE_UNIT = 1,
}
/// This type indicates if the base of the matrix indices is zero or one.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseIndexBase_t {
/// The base index is zero (C compatibility).
CUSPARSE_INDEX_BASE_ZERO = 0,
/// The base index is one (Fortran compatibility).
CUSPARSE_INDEX_BASE_ONE = 1,
}
/// This type indicates which operations is applied to the related input (e.g. sparse matrix, or vector).
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseOperation_t {
/// The non-transpose operation is selected.
CUSPARSE_OPERATION_NON_TRANSPOSE = 0,
/// The transpose operation is selected.
CUSPARSE_OPERATION_TRANSPOSE = 1,
/// The conjugate transpose operation is selected.
CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2,
}
/// This type indicates whether the elements of a dense matrix should be parsed by rows or by columns (assuming column-major storage in memory of the dense matrix) in function cusparse\[S|D|C|Z\]nnz. Besides storage format of blocks in BSR format is also controlled by this type.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseDirection_t {
/// The matrix should be parsed by rows.
CUSPARSE_DIRECTION_ROW = 0,
/// The matrix should be parsed by columns.
CUSPARSE_DIRECTION_COLUMN = 1,
}
/// This type indicates whether level information is generated and used in `csrsv2, csric02, csrilu02, bsrsv2, bsric02 and bsrilu02`.
#[deprecated]
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSolvePolicy_t {
/// no level information is generated and used.
#[deprecated]
CUSPARSE_SOLVE_POLICY_NO_LEVEL = 0,
/// generate and use level information.
#[deprecated]
CUSPARSE_SOLVE_POLICY_USE_LEVEL = 1,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseColorAlg_t {
CUSPARSE_COLOR_ALG0 = 0,
CUSPARSE_COLOR_ALG1 = 1,
}
unsafe extern "C" {
/// This function initializes the cuSPARSE library and creates a handle on the cuSPARSE context. It must be called before any other cuSPARSE API function is invoked. It allocates hardware resources necessary for accessing the GPU.
///
/// # Parameters
///
/// - `handle`: The pointer to the handle to the cuSPARSE context.
pub fn cusparseCreate(handle: *mut cusparseHandle_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases CPU-side resources used by the cuSPARSE library. The release of GPU-side resources may be deferred until the application shuts down.
///
/// # Parameters
///
/// - `handle`: The handle to the cuSPARSE context.
pub fn cusparseDestroy(handle: cusparseHandle_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the version number of the cuSPARSE library.
///
/// # Parameters
///
/// - `handle`: cuSPARSE handle.
/// - `version`: The version number of the library.
pub fn cusparseGetVersion(
handle: cusparseHandle_t,
version: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function returns the value of the requested property. Refer to [`libraryPropertyType`] for supported types.
///
/// [`libraryPropertyType`] (defined in `library_types.h`).
///
/// # Parameters
///
/// - `value`: Value of the requested property.
pub fn cusparseGetProperty(
type_: libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function returns the string representation of an error code enum name. If the error code is not recognized, “unrecognized error code” is returned.
///
/// # Parameters
///
/// - `status`: Error code to convert to string.
pub fn cusparseGetErrorName(status: cusparseStatus_t) -> *const ::core::ffi::c_char;
}
unsafe extern "C" {
/// # Parameters
///
/// - `status`: Error code to convert to string.
pub fn cusparseGetErrorString(
status: cusparseStatus_t,
) -> *const ::core::ffi::c_char;
}
unsafe extern "C" {
/// This function sets the stream to be used by the cuSPARSE library to execute its routines.
///
/// # Parameters
///
/// - `handle`: The handle to the cuSPARSE context.
/// - `streamId`: The stream to be used by the library.
pub fn cusparseSetStream(
handle: cusparseHandle_t,
streamId: cudaStream_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function gets the cuSPARSE library stream, which is being used to to execute all calls to the cuSPARSE library functions. If the cuSPARSE library stream is not set, all kernels use the default NULL stream.
///
/// # Parameters
///
/// - `handle`: The handle to the cuSPARSE context.
/// - `streamId`: The stream used by the library.
pub fn cusparseGetStream(
handle: cusparseHandle_t,
streamId: *mut cudaStream_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function obtains the pointer mode used by the cuSPARSE library. Please see the section on the [`cusparsePointerMode_t`] type for more details.
///
/// # Parameters
///
/// - `handle`: The handle to the cuSPARSE context.
/// - `mode`: One of the enumerated pointer mode types.
pub fn cusparseGetPointerMode(
handle: cusparseHandle_t,
mode: *mut cusparsePointerMode_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the pointer mode used by the cuSPARSE library. The `default` is for the values to be passed by reference on the host. Please see the section on the `cublasPointerMode_t` type for more details.
///
/// # Parameters
///
/// - `handle`: The handle to the cuSPARSE context.
/// - `mode`: One of the enumerated pointer mode types.
pub fn cusparseSetPointerMode(
handle: cusparseHandle_t,
mode: cusparsePointerMode_t,
) -> cusparseStatus_t;
}
pub type cusparseLoggerCallback_t = ::core::option::Option<
unsafe extern "C" fn(
logLevel: ::core::ffi::c_int,
functionName: *const ::core::ffi::c_char,
message: *const ::core::ffi::c_char,
),
>;
unsafe extern "C" {
/// `Experimental`: The function sets the logging callback function.
///
/// where [`cusparseLoggerCallback_t`] has the following signature.
pub fn cusparseLoggerSetCallback(
callback: cusparseLoggerCallback_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// `Experimental`: The function sets the logging output file. Note: once registered using this function call, the provided file handle must not be closed unless the function is called again to switch to a different file handle.
///
/// # Parameters
///
/// - `file`: Pointer to an open file. File should have write permission.
pub fn cusparseLoggerSetFile(file: *mut FILE) -> cusparseStatus_t;
}
unsafe extern "C" {
/// `Experimental`: The function opens a logging output file in the given path.
///
/// # Parameters
///
/// - `logFile`: Path of the logging output file.
pub fn cusparseLoggerOpenFile(
logFile: *const ::core::ffi::c_char,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// `Experimental`: The function sets the value of the logging level. path.
///
/// # Parameters
///
/// - `level`: Value of the logging level.
pub fn cusparseLoggerSetLevel(level: ::core::ffi::c_int) -> cusparseStatus_t;
}
unsafe extern "C" {
/// `Experimental`: The function sets the value of the logging mask.
///
/// # Parameters
///
/// - `mask`: Value of the logging mask.
pub fn cusparseLoggerSetMask(mask: ::core::ffi::c_int) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseLoggerForceDisable() -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the matrix descriptor. It sets the fields `MatrixType` and `IndexBase` to the `default` values [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] and [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO`], respectively, while leaving other fields uninitialized.
///
/// # Parameters
///
/// - `descrA`: the pointer to the matrix descriptor.
pub fn cusparseCreateMatDescr(descrA: *mut cusparseMatDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases the memory allocated for the matrix descriptor.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseDestroyMatDescr(descrA: cusparseMatDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `MatrixType` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseSetMatType(
descrA: cusparseMatDescr_t,
type_: cusparseMatrixType_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `MatrixType` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseGetMatType(descrA: cusparseMatDescr_t) -> cusparseMatrixType_t;
}
unsafe extern "C" {
/// This function sets the `FillMode` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseSetMatFillMode(
descrA: cusparseMatDescr_t,
fillMode: cusparseFillMode_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `FillMode` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseGetMatFillMode(descrA: cusparseMatDescr_t) -> cusparseFillMode_t;
}
unsafe extern "C" {
/// This function sets the `DiagType` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseSetMatDiagType(
descrA: cusparseMatDescr_t,
diagType: cusparseDiagType_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `DiagType` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseGetMatDiagType(descrA: cusparseMatDescr_t) -> cusparseDiagType_t;
}
unsafe extern "C" {
/// This function sets the `IndexBase` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseSetMatIndexBase(
descrA: cusparseMatDescr_t,
base: cusparseIndexBase_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `IndexBase` field of the matrix descriptor `descrA`.
///
/// # Parameters
///
/// - `descrA`: the matrix descriptor.
pub fn cusparseGetMatIndexBase(descrA: cusparseMatDescr_t) -> cusparseIndexBase_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of incomplete Cholesky to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of incomplete Cholesky.
#[deprecated]
pub fn cusparseCreateCsric02Info(info: *mut csric02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(csric02_solve)` and analysis `(csric02_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyCsric02Info(info: csric02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of block incomplete Cholesky to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of block incomplete Cholesky.
#[deprecated]
pub fn cusparseCreateBsric02Info(info: *mut bsric02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(bsric02_solve)` and analysis `(bsric02_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyBsric02Info(info: bsric02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of incomplete LU to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of incomplete LU.
#[deprecated]
pub fn cusparseCreateCsrilu02Info(info: *mut csrilu02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(csrilu02_solve)` and analysis `(csrilu02_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyCsrilu02Info(info: csrilu02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of block incomplete LU to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of block incomplete LU.
#[deprecated]
pub fn cusparseCreateBsrilu02Info(info: *mut bsrilu02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(bsrilu02_solve)` and analysis `(bsrilu02_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyBsrilu02Info(info: bsrilu02Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of bsrsv2 to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of bsrsv2.
#[deprecated]
pub fn cusparseCreateBsrsv2Info(info: *mut bsrsv2Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(bsrsv2_solve)` and analysis `(bsrsv2_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyBsrsv2Info(info: bsrsv2Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the solve and analysis structure of bsrsm2 to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the solve and analysis structure of bsrsm2.
#[deprecated]
pub fn cusparseCreateBsrsm2Info(info: *mut bsrsm2Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the solve `(bsrsm2_solve)` and analysis `(bsrsm2_analysis)` structure.
#[deprecated]
pub fn cusparseDestroyBsrsm2Info(info: bsrsm2Info_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateCsru2csrInfo(info: *mut csru2csrInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDestroyCsru2csrInfo(info: csru2csrInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes the [`cusparseColorInfo_t`] structure to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the [`cusparseColorInfo_t`] structure.
#[deprecated]
pub fn cusparseCreateColorInfo(info: *mut cusparseColorInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the pointer to the structure of `csrcolor()`.
#[deprecated]
pub fn cusparseDestroyColorInfo(info: cusparseColorInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates and initializes structure of `prune` to `default` values.
///
/// # Parameters
///
/// - `info`: the pointer to the structure of `prune`.
#[deprecated]
pub fn cusparseCreatePruneInfo(info: *mut pruneInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function destroys and releases any memory required by the structure.
///
/// # Parameters
///
/// - `info`: the structure of `prune`.
#[deprecated]
pub fn cusparseDestroyPruneInfo(info: pruneInfo_t) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture
///
/// The function `cusparse<t>gemvi_bufferSize()` returns the size of buffer used in `cusparse<t>gemvi()`.
pub fn cusparseSgemvi(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
A: *const f32,
lda: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
xVal: *const f32,
xInd: *const ::core::ffi::c_int,
beta: *const f32,
y: *mut f32,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgemvi_bufferSize(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
pBufferSize: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture
///
/// The function `cusparse<t>gemvi_bufferSize()` returns the size of buffer used in `cusparse<t>gemvi()`.
pub fn cusparseDgemvi(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
A: *const f64,
lda: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
xVal: *const f64,
xInd: *const ::core::ffi::c_int,
beta: *const f64,
y: *mut f64,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgemvi_bufferSize(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
pBufferSize: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture
///
/// The function `cusparse<t>gemvi_bufferSize()` returns the size of buffer used in `cusparse<t>gemvi()`.
pub fn cusparseCgemvi(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuComplex,
A: *const cuComplex,
lda: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
xVal: *const cuComplex,
xInd: *const ::core::ffi::c_int,
beta: *const cuComplex,
y: *mut cuComplex,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgemvi_bufferSize(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
pBufferSize: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture
///
/// The function `cusparse<t>gemvi_bufferSize()` returns the size of buffer used in `cusparse<t>gemvi()`.
pub fn cusparseZgemvi(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
xVal: *const cuDoubleComplex,
xInd: *const ::core::ffi::c_int,
beta: *const cuDoubleComplex,
y: *mut cuDoubleComplex,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgemvi_bufferSize(
handle: cusparseHandle_t,
transA: cusparseOperation_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
pBufferSize: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// `bsrmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Several comments on `bsrmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
///
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
/// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
///
/// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
pub fn cusparseSbsrmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const f32,
beta: *const f32,
y: *mut f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// `bsrmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Several comments on `bsrmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
///
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
/// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
///
/// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
pub fn cusparseDbsrmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const f64,
beta: *const f64,
y: *mut f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// `bsrmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Several comments on `bsrmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
///
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
/// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
///
/// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
pub fn cusparseCbsrmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const cuComplex,
beta: *const cuComplex,
y: *mut cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the matrix-vector operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// `bsrmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Several comments on `bsrmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
///
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
/// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
///
/// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
pub fn cusparseZbsrmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const cuDoubleComplex,
beta: *const cuDoubleComplex,
y: *mut cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs a `bsrmv` and a mask operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
///
/// For example, consider the $2 \times 3$ block matrix $A$:
///
/// and its one-based BSR format (three vector form) is:
///
/// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
///
/// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
///
/// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
///
/// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ \[2\] and `sizeOfMask`=1.
///
/// The mask operator is equivalent to the following operation:
///
/// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
///
/// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr\[0\]` and `bsrEndPtr\[0\]` are not touched also.
///
/// `bsrxmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// A couple of comments on `bsrxmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
/// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
pub fn cusparseSbsrxmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
sizeOfMask: ::core::ffi::c_int,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedMaskPtrA: *const ::core::ffi::c_int,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedEndPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const f32,
beta: *const f32,
y: *mut f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs a `bsrmv` and a mask operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
///
/// For example, consider the $2 \times 3$ block matrix $A$:
///
/// and its one-based BSR format (three vector form) is:
///
/// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
///
/// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
///
/// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
///
/// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ \[2\] and `sizeOfMask`=1.
///
/// The mask operator is equivalent to the following operation:
///
/// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
///
/// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr\[0\]` and `bsrEndPtr\[0\]` are not touched also.
///
/// `bsrxmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// A couple of comments on `bsrxmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
/// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
pub fn cusparseDbsrxmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
sizeOfMask: ::core::ffi::c_int,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedMaskPtrA: *const ::core::ffi::c_int,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedEndPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const f64,
beta: *const f64,
y: *mut f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs a `bsrmv` and a mask operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
///
/// For example, consider the $2 \times 3$ block matrix $A$:
///
/// and its one-based BSR format (three vector form) is:
///
/// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
///
/// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
///
/// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
///
/// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ \[2\] and `sizeOfMask`=1.
///
/// The mask operator is equivalent to the following operation:
///
/// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
///
/// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr\[0\]` and `bsrEndPtr\[0\]` are not touched also.
///
/// `bsrxmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// A couple of comments on `bsrxmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
/// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
pub fn cusparseCbsrxmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
sizeOfMask: ::core::ffi::c_int,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedMaskPtrA: *const ::core::ffi::c_int,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedEndPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const cuComplex,
beta: *const cuComplex,
y: *mut cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs a `bsrmv` and a mask operation
///
/// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
///
/// For example, consider the $2 \times 3$ block matrix $A$:
///
/// and its one-based BSR format (three vector form) is:
///
/// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
///
/// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
///
/// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
///
/// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ \[2\] and `sizeOfMask`=1.
///
/// The mask operator is equivalent to the following operation:
///
/// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
///
/// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr\[0\]` and `bsrEndPtr\[0\]` are not touched also.
///
/// `bsrxmv()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// A couple of comments on `bsrxmv()`:
///
/// * Only `blockDim > 1` is supported
/// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
/// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
pub fn cusparseZbsrxmv(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
sizeOfMask: ::core::ffi::c_int,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedMaskPtrA: *const ::core::ffi::c_int,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedEndPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
x: *const cuDoubleComplex,
beta: *const cuDoubleComplex,
y: *mut cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` is either structural zero or numerical zero (singular block). Otherwise `position=-1`.
///
/// The `position` can be 0-based or 1-based, the same as the matrix.
///
/// Function [`cusparseXbsrsv2_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: if no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
#[deprecated]
pub fn cusparseXbsrsv2_zeroPivot(
handle: cusparseHandle_t,
info: bsrsv2Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrsv2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrsv2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrsv2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrsv2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSbsrsv2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDbsrsv2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCbsrsv2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZbsrsv2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *mut cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsv2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSbsrsv2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDbsrsv2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCbsrsv2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZbsrsv2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
///
/// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
///
/// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
pub fn cusparseSbsrsv2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
f: *const f32,
x: *mut f32,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
///
/// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
///
/// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
pub fn cusparseDbsrsv2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
f: *const f64,
x: *mut f64,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
///
/// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
///
/// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
pub fn cusparseCbsrsv2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
f: *const cuComplex,
x: *mut cuComplex,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } trans = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } trans = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } trans = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
///
/// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
///
/// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
pub fn cusparseZbsrsv2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrsv2Info_t,
f: *const cuDoubleComplex,
x: *mut cuDoubleComplex,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs one of the following matrix-matrix operations:
///
/// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } transA = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } transA = \text{CUSPARSE_OPERATION_TRANSPOSE (not supported)} \\
/// A^H & \text{if } transA = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and:
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } transB = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } transB = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// B^H & \text{if } transB = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// The function has the following limitations:
///
/// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
/// * only `blockDim > 1` is supported
/// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
/// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
/// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
///
/// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
///
/// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
///
/// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublas<t>geam()`, and then to perform `A*transpose(Bt)`.
///
/// `bsrmm()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrmm(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
B: *const f32,
ldb: ::core::ffi::c_int,
beta: *const f32,
C: *mut f32,
ldc: ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs one of the following matrix-matrix operations:
///
/// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } transA = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } transA = \text{CUSPARSE_OPERATION_TRANSPOSE (not supported)} \\
/// A^H & \text{if } transA = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and:
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } transB = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } transB = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// B^H & \text{if } transB = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// The function has the following limitations:
///
/// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
/// * only `blockDim > 1` is supported
/// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
/// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
/// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
///
/// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
///
/// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
///
/// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublas<t>geam()`, and then to perform `A*transpose(Bt)`.
///
/// `bsrmm()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrmm(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
B: *const f64,
ldb: ::core::ffi::c_int,
beta: *const f64,
C: *mut f64,
ldc: ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs one of the following matrix-matrix operations:
///
/// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } transA = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } transA = \text{CUSPARSE_OPERATION_TRANSPOSE (not supported)} \\
/// A^H & \text{if } transA = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and:
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } transB = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } transB = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// B^H & \text{if } transB = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// The function has the following limitations:
///
/// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
/// * only `blockDim > 1` is supported
/// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
/// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
/// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
///
/// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
///
/// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
///
/// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublas<t>geam()`, and then to perform `A*transpose(Bt)`.
///
/// `bsrmm()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrmm(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
beta: *const cuComplex,
C: *mut cuComplex,
ldc: ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs one of the following matrix-matrix operations:
///
/// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } transA = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } transA = \text{CUSPARSE_OPERATION_TRANSPOSE (not supported)} \\
/// A^H & \text{if } transA = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and:
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } transB = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } transB = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// B^H & \text{if } transB = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// The function has the following limitations:
///
/// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
/// * only `blockDim > 1` is supported
/// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
/// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
/// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
///
/// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
///
/// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
///
/// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublas<t>geam()`, and then to perform `A*transpose(Bt)`.
///
/// `bsrmm()` has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrmm(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
kb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
beta: *const cuDoubleComplex,
C: *mut cuDoubleComplex,
ldc: ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` is either a structural zero or a numerical zero (singular block). Otherwise `position=-1`.
///
/// The `position` can be 0-base or 1-base, the same as the matrix.
///
/// Function [`cusparseXbsrsm2_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: if no structural or numerical zero, `position` is -1; otherwise, if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
#[deprecated]
pub fn cusparseXbsrsm2_zeroPivot(
handle: cusparseHandle_t,
info: bsrsm2Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrsm2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrsm2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrsm2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrsm2_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSbsrsm2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDbsrsm2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCbsrsm2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZbsrsm2_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transB: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{if } transX = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and `op(B)` and `op(X)` are equal.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
///
/// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSbsrsm2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{if } transX = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and `op(B)` and `op(X)` are equal.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
///
/// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDbsrsm2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{if } transX = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and `op(B)` and `op(X)` are equal.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
///
/// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCbsrsm2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{if } transX = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE (not supported)}
/// \end{cases}
/// $$
///
/// and `op(B)` and `op(X)` are equal.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// It is expected that this function will be executed only once for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
///
/// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZbsrsm2_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the solution of a sparse triangular linear system:
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{not supported}
/// \end{cases}
/// $$
///
/// Only `op(A)=A` is supported.
///
/// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
///
/// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
///
/// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrsm2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
B: *const f32,
ldb: ::core::ffi::c_int,
X: *mut f32,
ldx: ::core::ffi::c_int,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the solution of a sparse triangular linear system:
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{not supported}
/// \end{cases}
/// $$
///
/// Only `op(A)=A` is supported.
///
/// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
///
/// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
///
/// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrsm2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
B: *const f64,
ldb: ::core::ffi::c_int,
X: *mut f64,
ldx: ::core::ffi::c_int,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the solution of a sparse triangular linear system:
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{not supported}
/// \end{cases}
/// $$
///
/// Only `op(A)=A` is supported.
///
/// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
///
/// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
///
/// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrsm2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
X: *mut cuComplex,
ldx: ::core::ffi::c_int,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the solution of a sparse triangular linear system:
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
///
/// $\operatorname{op}(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE}$
///
/// and:
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } transX = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X^T & \text{if } transX = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// X^H & \text{not supported}
/// \end{cases}
/// $$
///
/// Only `op(A)=A` is supported.
///
/// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
///
/// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
///
/// This function may be executed multiple times for a given matrix and a particular operation type.
///
/// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
///
/// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
///
/// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrsm2_solve(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
transA: cusparseOperation_t,
transXY: cusparseOperation_t,
mb: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrsm2Info_t,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
X: *mut cuDoubleComplex,
ldx: ::core::ffi::c_int,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
///
/// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
///
/// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseScsrilu02_numericBoost(
handle: cusparseHandle_t,
info: csrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
///
/// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
///
/// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDcsrilu02_numericBoost(
handle: cusparseHandle_t,
info: csrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
///
/// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
///
/// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCcsrilu02_numericBoost(
handle: cusparseHandle_t,
info: csrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
///
/// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
///
/// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZcsrilu02_numericBoost(
handle: cusparseHandle_t,
info: csrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero; otherwise, `position=-1`.
///
/// The `position` can be 0-based or 1-based, the same as the matrix.
///
/// Function [`cusparseXcsrilu02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize(`) to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: If no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
#[deprecated]
pub fn cusparseXcsrilu02_zeroPivot(
handle: cusparseHandle_t,
info: csrilu02Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseScsrilu02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDcsrilu02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCcsrilu02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZcsrilu02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut f32,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut f64,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut cuComplex,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut cuDoubleComplex,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseScsrilu02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDcsrilu02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCcsrilu02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZcsrilu02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
///
/// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture.
pub fn cusparseScsrilu02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
///
/// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture.
pub fn cusparseDcsrilu02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
///
/// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture.
pub fn cusparseCcsrilu02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
///
/// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * The routine supports CUDA graph capture.
pub fn cusparseZcsrilu02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
///
/// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
///
/// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrilu02_numericBoost(
handle: cusparseHandle_t,
info: bsrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
///
/// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
///
/// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrilu02_numericBoost(
handle: cusparseHandle_t,
info: bsrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
///
/// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
///
/// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrilu02_numericBoost(
handle: cusparseHandle_t,
info: bsrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
///
/// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
///
/// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
///
/// If `enable_boost=0`, `tol` and `boost_val` are ignored.
///
/// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrilu02_numericBoost(
handle: cusparseHandle_t,
info: bsrilu02Info_t,
enable_boost: ::core::ffi::c_int,
tol: *mut f64,
boost_val: *mut cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero (the block is not invertible). Otherwise `position=-1`.
///
/// The `position` can be 0-based or 1-based, the same as the matrix.
///
/// Function [`cusparseXbsrilu02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set proper the mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: if no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is not invertible, `position=j`.
#[deprecated]
pub fn cusparseXbsrilu02_zeroPivot(
handle: cusparseHandle_t,
info: bsrilu02Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// **Status Returned**
///
/// | | |
/// | --- | --- |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
pub fn cusparseSbsrilu02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// **Status Returned**
///
/// | | |
/// | --- | --- |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
pub fn cusparseDbsrilu02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// **Status Returned**
///
/// | | |
/// | --- | --- |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
pub fn cusparseCbsrilu02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// **Status Returned**
///
/// | | |
/// | --- | --- |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
/// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
pub fn cusparseZbsrilu02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSbsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDbsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCbsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZbsrilu02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsrilu02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// | |
/// | --- |
/// | $A \approx LU$ |
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSbsrilu02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// | |
/// | --- |
/// | $A \approx LU$ |
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDbsrilu02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// | |
/// | --- |
/// | $A \approx LU$ |
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCbsrilu02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// | |
/// | --- |
/// | $A \approx LU$ |
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZbsrilu02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
///
/// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsrilu02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
///
/// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsrilu02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
///
/// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsrilu02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
///
/// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
///
/// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
///
/// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsrilu02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsrilu02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero; otherwise, `position=-1`.
///
/// The `position` can be 0-based or 1-based, the same as the matrix.
///
/// Function [`cusparseXcsric02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: if no structural or numerical zero, `position` is -1; otherwise, if `A(j,j)` is missing or `L(j,j)` is zero, `position=j`.
#[deprecated]
pub fn cusparseXcsric02_zeroPivot(
handle: cusparseHandle_t,
info: csric02Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseScsric02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDcsric02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCcsric02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZcsric02_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *mut cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsric02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut f32,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsric02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut f64,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsric02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut cuComplex,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsric02_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedVal: *mut cuDoubleComplex,
csrSortedRowPtr: *const ::core::ffi::c_int,
csrSortedColInd: *const ::core::ffi::c_int,
info: csric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseScsric02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDcsric02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCcsric02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZcsric02_analysis(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
///
/// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02() ``A` is Hermitian and only the lower triangular part is provided.
///
/// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseScsric02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
///
/// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02() ``A` is Hermitian and only the lower triangular part is provided.
///
/// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDcsric02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
///
/// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02() ``A` is Hermitian and only the lower triangular part is provided.
///
/// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCcsric02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
///
/// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
///
/// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02() ``A` is Hermitian and only the lower triangular part is provided.
///
/// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZcsric02(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA_valM: *mut cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
info: csric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero (the block is not positive definite). Otherwise `position=-1`.
///
/// The `position` can be 0-based or 1-based, the same as the matrix.
///
/// Function [`cusparseXbsric02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
///
/// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// # Parameters
///
/// - `position`: If no structural or numerical zero, `position` is -1, otherwise if `A(j,j)` is missing or `L(j,j)` is not positive definite, `position=j`.
#[deprecated]
pub fn cusparseXbsric02_zeroPivot(
handle: cusparseHandle_t,
info: bsric02Info_t,
position: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsric02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsric02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsric02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
///
/// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsric02_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSbsric02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDbsric02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCbsric02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZbsric02_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockSize: ::core::ffi::c_int,
info: bsric02Info_t,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSbsric02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pInputBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDbsric02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pInputBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCbsric02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pInputBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02\[_analysis| \]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
///
/// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
///
/// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZbsric02_analysis(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pInputBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
///
/// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
///
/// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
///
/// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSbsric02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
///
/// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
///
/// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
///
/// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDbsric02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
///
/// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
///
/// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
///
/// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCbsric02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
///
/// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
///
/// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
///
/// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
///
/// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
///
/// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZbsric02(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedVal: *mut cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
info: bsric02Info_t,
policy: cusparseSolvePolicy_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgtsv2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
B: *const f32,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgtsv2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
B: *const f64,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgtsv2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgtsv2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparse<t>gtsv_nopivot()` or `cusparse<t>gtsv2_nopivot()` at the expense of some execution time.
///
/// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSgtsv2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
B: *mut f32,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparse<t>gtsv_nopivot()` or `cusparse<t>gtsv2_nopivot()` at the expense of some execution time.
///
/// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDgtsv2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
B: *mut f64,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparse<t>gtsv_nopivot()` or `cusparse<t>gtsv2_nopivot()` at the expense of some execution time.
///
/// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCgtsv2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparse<t>gtsv_nopivot()` or `cusparse<t>gtsv2_nopivot()` at the expense of some execution time.
///
/// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZgtsv2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSgtsv2_nopivot_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
B: *const f32,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDgtsv2_nopivot_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
B: *const f64,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCgtsv2_nopivot_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
B: *const cuComplex,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZgtsv2_nopivot_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
B: *const cuDoubleComplex,
ldb: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSgtsv2_nopivot(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
B: *mut f32,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDgtsv2_nopivot(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
B: *mut f64,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCgtsv2_nopivot(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
B: *mut cuComplex,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZgtsv2_nopivot(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
B: *mut cuDoubleComplex,
ldb: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSgtsv2StridedBatch_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
x: *const f32,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDgtsv2StridedBatch_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
x: *const f64,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCgtsv2StridedBatch_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
x: *const cuComplex,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZgtsv2StridedBatch_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
x: *const cuDoubleComplex,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
bufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseSgtsv2StridedBatch(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
x: *mut f32,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDgtsv2StridedBatch(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
x: *mut f64,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCgtsv2StridedBatch(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
x: *mut cuComplex,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
///
/// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
///
/// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZgtsv2StridedBatch(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
x: *mut cuDoubleComplex,
batchCount: ::core::ffi::c_int,
batchStride: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgtsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *const f32,
d: *const f32,
du: *const f32,
x: *const f32,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgtsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *const f64,
d: *const f64,
du: *const f64,
x: *const f64,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgtsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
x: *const cuComplex,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgtsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
x: *const cuDoubleComplex,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continuous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
///
/// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
///
/// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSgtsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *mut f32,
d: *mut f32,
du: *mut f32,
x: *mut f32,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continuous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
///
/// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
///
/// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDgtsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *mut f64,
d: *mut f64,
du: *mut f64,
x: *mut f64,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continuous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
///
/// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
///
/// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCgtsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *mut cuComplex,
d: *mut cuComplex,
du: *mut cuComplex,
x: *mut cuComplex,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continuous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
///
/// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
///
/// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZgtsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
dl: *mut cuDoubleComplex,
d: *mut cuDoubleComplex,
du: *mut cuDoubleComplex,
x: *mut cuDoubleComplex,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgpsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *const f32,
dl: *const f32,
d: *const f32,
du: *const f32,
dw: *const f32,
x: *const f32,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgpsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *const f64,
dl: *const f64,
d: *const f64,
du: *const f64,
dw: *const f64,
x: *const f64,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgpsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *const cuComplex,
dl: *const cuComplex,
d: *const cuComplex,
du: *const cuComplex,
dw: *const cuComplex,
x: *const cuComplex,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgpsvInterleavedBatch_bufferSizeExt(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *const cuDoubleComplex,
dl: *const cuDoubleComplex,
d: *const cuDoubleComplex,
du: *const cuDoubleComplex,
dw: *const cuDoubleComplex,
x: *const cuDoubleComplex,
batchCount: ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
///
/// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
///
/// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
///
/// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
///
/// The data layout is the same as `gtsvStridedBatch`.
///
/// The routine is numerically stable because it uses QR to solve the linear system.
///
/// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
pub fn cusparseSgpsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *mut f32,
dl: *mut f32,
d: *mut f32,
du: *mut f32,
dw: *mut f32,
x: *mut f32,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
///
/// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
///
/// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
///
/// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
///
/// The data layout is the same as `gtsvStridedBatch`.
///
/// The routine is numerically stable because it uses QR to solve the linear system.
///
/// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
pub fn cusparseDgpsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *mut f64,
dl: *mut f64,
d: *mut f64,
du: *mut f64,
dw: *mut f64,
x: *mut f64,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
///
/// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
///
/// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
///
/// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
///
/// The data layout is the same as `gtsvStridedBatch`.
///
/// The routine is numerically stable because it uses QR to solve the linear system.
///
/// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
pub fn cusparseCgpsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *mut cuComplex,
dl: *mut cuComplex,
d: *mut cuComplex,
du: *mut cuComplex,
dw: *mut cuComplex,
x: *mut cuComplex,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
///
/// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
///
/// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
///
/// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
///
/// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
///
/// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
///
/// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
///
/// `d(i) = A(i,i)` for `i=1,2,...,m`
///
/// `du(i) = A(i,i+1)` for `i=1,2,...,m`
///
/// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
///
/// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
///
/// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
///
/// The data layout is the same as `gtsvStridedBatch`.
///
/// The routine is numerically stable because it uses QR to solve the linear system.
///
/// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The function supports the following properties if `pBuffer != NULL`:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
///
/// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
pub fn cusparseZgpsvInterleavedBatch(
handle: cusparseHandle_t,
algo: ::core::ffi::c_int,
m: ::core::ffi::c_int,
ds: *mut cuDoubleComplex,
dl: *mut cuDoubleComplex,
d: *mut cuDoubleComplex,
du: *mut cuDoubleComplex,
dw: *mut cuDoubleComplex,
x: *mut cuDoubleComplex,
batchCount: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsrgeam2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const f32,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const f32,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsrgeam2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const f64,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const f64,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsrgeam2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const cuComplex,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const cuComplex,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *const cuComplex,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsrgeam2_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const cuDoubleComplex,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const cuDoubleComplex,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *const cuDoubleComplex,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcsrgeam2Nnz(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
workspace: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs following matrix-matrix operation
///
/// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1 `elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse\[S|D|C|Z\]csrgeam2()` to complete matrix `C`.
///
/// The general procedure is as follows:
///
/// Several comments on `csrgeam2()`:
///
/// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
/// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse\[S|D|C|Z\]geam()` only for each iteration.
/// * The pointers `alpha` and `beta` must be valid.
/// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
/// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseScsrgeam2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f32,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const f32,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const f32,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs following matrix-matrix operation
///
/// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1 `elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse\[S|D|C|Z\]csrgeam2()` to complete matrix `C`.
///
/// The general procedure is as follows:
///
/// Several comments on `csrgeam2()`:
///
/// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
/// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse\[S|D|C|Z\]geam()` only for each iteration.
/// * The pointers `alpha` and `beta` must be valid.
/// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
/// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDcsrgeam2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const f64,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const f64,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const f64,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs following matrix-matrix operation
///
/// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1 `elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse\[S|D|C|Z\]csrgeam2()` to complete matrix `C`.
///
/// The general procedure is as follows:
///
/// Several comments on `csrgeam2()`:
///
/// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
/// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse\[S|D|C|Z\]geam()` only for each iteration.
/// * The pointers `alpha` and `beta` must be valid.
/// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
/// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCcsrgeam2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuComplex,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const cuComplex,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const cuComplex,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs following matrix-matrix operation
///
/// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1 `elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse\[S|D|C|Z\]csrgeam2()` to complete matrix `C`.
///
/// The general procedure is as follows:
///
/// Several comments on `csrgeam2()`:
///
/// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
/// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
/// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse\[S|D|C|Z\]geam()` only for each iteration.
/// * The pointers `alpha` and `beta` must be valid.
/// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
/// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZcsrgeam2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
alpha: *const cuDoubleComplex,
descrA: cusparseMatDescr_t,
nnzA: ::core::ffi::c_int,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
beta: *const cuDoubleComplex,
descrB: cusparseMatDescr_t,
nnzB: ::core::ffi::c_int,
csrSortedValB: *const cuDoubleComplex,
csrSortedRowPtrB: *const ::core::ffi::c_int,
csrSortedColIndB: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuDoubleComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
///
/// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseScsrcolor(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
fractionToColor: *const f32,
ncolors: *mut ::core::ffi::c_int,
coloring: *mut ::core::ffi::c_int,
reordering: *mut ::core::ffi::c_int,
info: cusparseColorInfo_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
///
/// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDcsrcolor(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
fractionToColor: *const f64,
ncolors: *mut ::core::ffi::c_int,
coloring: *mut ::core::ffi::c_int,
reordering: *mut ::core::ffi::c_int,
info: cusparseColorInfo_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
///
/// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCcsrcolor(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
fractionToColor: *const f32,
ncolors: *mut ::core::ffi::c_int,
coloring: *mut ::core::ffi::c_int,
reordering: *mut ::core::ffi::c_int,
info: cusparseColorInfo_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
///
/// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZcsrcolor(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
fractionToColor: *const f64,
ncolors: *mut ::core::ffi::c_int,
coloring: *mut ::core::ffi::c_int,
reordering: *mut ::core::ffi::c_int,
info: cusparseColorInfo_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSnnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
A: *const f32,
lda: ::core::ffi::c_int,
nnzPerRowCol: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDnnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
A: *const f64,
lda: ::core::ffi::c_int,
nnzPerRowCol: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCnnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
A: *const cuComplex,
lda: ::core::ffi::c_int,
nnzPerRowCol: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZnnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
A: *const cuDoubleComplex,
lda: ::core::ffi::c_int,
nnzPerRowCol: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function is the step one to convert from csr format to compressed csr format.
///
/// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
///
/// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSnnz_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
descr: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzPerRow: *mut ::core::ffi::c_int,
nnzC: *mut ::core::ffi::c_int,
tol: f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function is the step one to convert from csr format to compressed csr format.
///
/// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
///
/// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDnnz_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
descr: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzPerRow: *mut ::core::ffi::c_int,
nnzC: *mut ::core::ffi::c_int,
tol: f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function is the step one to convert from csr format to compressed csr format.
///
/// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
///
/// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCnnz_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
descr: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzPerRow: *mut ::core::ffi::c_int,
nnzC: *mut ::core::ffi::c_int,
tol: cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function is the step one to convert from csr format to compressed csr format.
///
/// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
///
/// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZnnz_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
descr: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzPerRow: *mut ::core::ffi::c_int,
nnzC: *mut ::core::ffi::c_int,
tol: cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsr2csr_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedColIndA: *const ::core::ffi::c_int,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
nnzPerRow: *const ::core::ffi::c_int,
csrSortedValC: *mut f32,
csrSortedColIndC: *mut ::core::ffi::c_int,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
tol: f32,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsr2csr_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedColIndA: *const ::core::ffi::c_int,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
nnzPerRow: *const ::core::ffi::c_int,
csrSortedValC: *mut f64,
csrSortedColIndC: *mut ::core::ffi::c_int,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
tol: f64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsr2csr_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedColIndA: *const ::core::ffi::c_int,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
nnzPerRow: *const ::core::ffi::c_int,
csrSortedValC: *mut cuComplex,
csrSortedColIndC: *mut ::core::ffi::c_int,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
tol: cuComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsr2csr_compress(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedColIndA: *const ::core::ffi::c_int,
csrSortedRowPtrA: *const ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
nnzPerRow: *const ::core::ffi::c_int,
csrSortedValC: *mut cuDoubleComplex,
csrSortedColIndC: *mut ::core::ffi::c_int,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
tol: cuDoubleComplex,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts the array containing the uncompressed row indices (corresponding to COO format) into an array of compressed row pointers (corresponding to CSR format).
///
/// It can also be used to convert the array containing the uncompressed column indices (corresponding to COO format) into an array of column pointers (corresponding to CSC format).
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseXcoo2csr(
handle: cusparseHandle_t,
cooRowInd: *const ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
m: ::core::ffi::c_int,
csrSortedRowPtr: *mut ::core::ffi::c_int,
idxBase: cusparseIndexBase_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts the array containing the compressed row pointers (corresponding to CSR format) into an array of uncompressed row indices (corresponding to COO format).
///
/// It can also be used to convert the array containing the compressed column indices (corresponding to CSC format) into an array of uncompressed column indices (corresponding to COO format).
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseXcsr2coo(
handle: cusparseHandle_t,
csrSortedRowPtr: *const ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
m: ::core::ffi::c_int,
cooRowInd: *mut ::core::ffi::c_int,
idxBase: cusparseIndexBase_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcsr2bsrNnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsr2bsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f32,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsr2bsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f64,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsr2bsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsr2bsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuDoubleComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseSbsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseDbsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseCbsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseZbsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
blockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuDoubleComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgebsr2gebsc_bufferSize(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgebsr2gebsc_bufferSize(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgebsr2gebsc_bufferSize(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgebsr2gebsc_bufferSize(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgebsr2gebsc_bufferSizeExt(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgebsr2gebsc_bufferSizeExt(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgebsr2gebsc_bufferSizeExt(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgebsr2gebsc_bufferSizeExt(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
///
/// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
///
/// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSgebsr2gebsc(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f32,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
bscVal: *mut f32,
bscRowInd: *mut ::core::ffi::c_int,
bscColPtr: *mut ::core::ffi::c_int,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
///
/// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
///
/// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseDgebsr2gebsc(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const f64,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
bscVal: *mut f64,
bscRowInd: *mut ::core::ffi::c_int,
bscColPtr: *mut ::core::ffi::c_int,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
///
/// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
///
/// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseCgebsr2gebsc(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
bscVal: *mut cuComplex,
bscRowInd: *mut ::core::ffi::c_int,
bscColPtr: *mut ::core::ffi::c_int,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
///
/// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
///
/// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseZgebsr2gebsc(
handle: cusparseHandle_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
bsrSortedVal: *const cuDoubleComplex,
bsrSortedRowPtr: *const ::core::ffi::c_int,
bsrSortedColInd: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
bscVal: *mut cuDoubleComplex,
bscRowInd: *mut ::core::ffi::c_int,
bscColPtr: *mut ::core::ffi::c_int,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseXgebsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseSgebsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseDgebsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseCgebsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
///
/// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
///
/// `csrRowPtrC` of `m+1` elements
///
/// `csrValC` of `nnz` elements
///
/// `csrColIndC` of `nnz` elements
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
pub fn cusparseZgebsr2csr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut cuDoubleComplex,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcsr2gebsrNnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
///
/// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
///
/// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC\[mb\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse\[S|D|C|Z\]csr2gebsr()` is called to complete the conversion.
///
/// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
///
/// The general procedure is as follows:
///
/// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// The routine `cusparse<t>csr2gebsr()` has the following properties:
///
/// * The routine requires no extra storage if `pBuffer != NULL`.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseScsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f32,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
///
/// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
///
/// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC\[mb\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse\[S|D|C|Z\]csr2gebsr()` is called to complete the conversion.
///
/// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
///
/// The general procedure is as follows:
///
/// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// The routine `cusparse<t>csr2gebsr()` has the following properties:
///
/// * The routine requires no extra storage if `pBuffer != NULL`.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseDcsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f64,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
///
/// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
///
/// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC\[mb\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse\[S|D|C|Z\]csr2gebsr()` is called to complete the conversion.
///
/// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
///
/// The general procedure is as follows:
///
/// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// The routine `cusparse<t>csr2gebsr()` has the following properties:
///
/// * The routine requires no extra storage if `pBuffer != NULL`.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseCcsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
///
/// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
///
/// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC\[mb\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse\[S|D|C|Z\]csr2gebsr()` is called to complete the conversion.
///
/// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
///
/// The general procedure is as follows:
///
/// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
///
/// The routine `cusparse<t>csr2gebsr()` has the following properties:
///
/// * The routine requires no extra storage if `pBuffer != NULL`.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
pub fn cusparseZcsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const cuDoubleComplex,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuDoubleComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDim: ::core::ffi::c_int,
colBlockDim: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgebsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgebsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgebsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgebsr2gebsr_bufferSize(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSizeInBytes: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSgebsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDgebsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCgebsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZgebsr2gebsr_bufferSizeExt(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXgebsr2gebsrNnz(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
///
/// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]csr2gebsr()`.
///
/// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]gebsr2csr()`.
///
/// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC\[mc\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse\[S|D|C|Z\]gebsr2gebsr()` is called to complete the conversion.
///
/// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
///
/// The general procedure is as follows:
///
/// * The routines require no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routines do **not** support CUDA graph capture.
pub fn cusparseSgebsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f32,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f32,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
///
/// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]csr2gebsr()`.
///
/// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]gebsr2csr()`.
///
/// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC\[mc\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse\[S|D|C|Z\]gebsr2gebsr()` is called to complete the conversion.
///
/// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
///
/// The general procedure is as follows:
///
/// * The routines require no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routines do **not** support CUDA graph capture.
pub fn cusparseDgebsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const f64,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut f64,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
///
/// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]csr2gebsr()`.
///
/// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]gebsr2csr()`.
///
/// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC\[mc\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse\[S|D|C|Z\]gebsr2gebsr()` is called to complete the conversion.
///
/// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
///
/// The general procedure is as follows:
///
/// * The routines require no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routines do **not** support CUDA graph capture.
pub fn cusparseCgebsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
///
/// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]csr2gebsr()`.
///
/// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse\[S|D|C|Z\]gebsr2gebsr()` is the same as `cusparse\[S|D|C|Z\]gebsr2csr()`.
///
/// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA\[mb\] - bsrRowPtrA\[0\])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
///
/// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC\[mc\]-bsrRowPtrC\[0\])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse\[S|D|C|Z\]gebsr2gebsr()` is called to complete the conversion.
///
/// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
///
/// The general procedure is as follows:
///
/// * The routines require no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routines do **not** support CUDA graph capture.
pub fn cusparseZgebsr2gebsr(
handle: cusparseHandle_t,
dirA: cusparseDirection_t,
mb: ::core::ffi::c_int,
nb: ::core::ffi::c_int,
nnzb: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
bsrSortedValA: *const cuDoubleComplex,
bsrSortedRowPtrA: *const ::core::ffi::c_int,
bsrSortedColIndA: *const ::core::ffi::c_int,
rowBlockDimA: ::core::ffi::c_int,
colBlockDimA: ::core::ffi::c_int,
descrC: cusparseMatDescr_t,
bsrSortedValC: *mut cuDoubleComplex,
bsrSortedRowPtrC: *mut ::core::ffi::c_int,
bsrSortedColIndC: *mut ::core::ffi::c_int,
rowBlockDimC: ::core::ffi::c_int,
colBlockDimC: ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function creates an identity map. The output parameter `p` represents such map by `p = 0:1:(n-1)`.
///
/// This function is typically used with `coosort`, `csrsort`, `cscsort`.
///
/// * The routine requires no extra storage.
/// * The routine supports asynchronous execution.
/// * The routine supports CUDA graph capture.
#[deprecated]
pub fn cusparseCreateIdentityPermutation(
handle: cusparseHandle_t,
n: ::core::ffi::c_int,
p: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcoosort_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
cooRowsA: *const ::core::ffi::c_int,
cooColsA: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcoosortByRow(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
cooRowsA: *mut ::core::ffi::c_int,
cooColsA: *mut ::core::ffi::c_int,
P: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcoosortByColumn(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
cooRowsA: *mut ::core::ffi::c_int,
cooColsA: *mut ::core::ffi::c_int,
P: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcsrsort_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrRowPtrA: *const ::core::ffi::c_int,
csrColIndA: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sorts CSR format. The stable sorting is in-place.
///
/// The matrix type is regarded as [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] implicitly. In other words, any symmetric property is ignored.
///
/// This function `csrsort()` requires buffer size returned by `csrsort_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The parameter `P` is both input and output. If the user wants to compute sorted `csrVal`, `P` must be set as 0:1:(nnz-1) before `csrsort()`, and after `csrsort()`, new sorted value array satisfies `csrVal_sorted = csrVal(P)`.
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseXcsrsort(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrRowPtrA: *const ::core::ffi::c_int,
csrColIndA: *mut ::core::ffi::c_int,
P: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseXcscsort_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
cscColPtrA: *const ::core::ffi::c_int,
cscRowIndA: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sorts CSC format. The stable sorting is in-place.
///
/// The matrix type is regarded as [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] implicitly. In other words, any symmetric property is ignored.
///
/// This function `cscsort()` requires buffer size returned by `cscsort_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
///
/// The parameter `P` is both input and output. If the user wants to compute sorted `cscVal`, `P` must be set as 0:1:(nnz-1) before `cscsort()`, and after `cscsort()`, new sorted value array satisfies `cscVal_sorted = cscVal(P)`.
///
/// The general procedure is as follows:
///
/// * The routine requires no extra storage if `pBuffer != NULL`
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseXcscsort(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
cscColPtrA: *const ::core::ffi::c_int,
cscRowIndA: *mut ::core::ffi::c_int,
P: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsru2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *mut f32,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsru2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *mut f64,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsru2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *mut cuComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsru2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *mut cuDoubleComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsru2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut f32,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsru2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut f64,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsru2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut cuComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsru2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut cuDoubleComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseScsr2csru(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut f32,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDcsr2csru(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut f64,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCcsr2csru(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut cuComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseZcsr2csru(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrVal: *mut cuDoubleComplex,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *mut ::core::ffi::c_int,
info: csru2csrInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csrNnz(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csrNnz(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csr_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csrNnz(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csrNnz(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csr(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
threshold: *const f64,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csrByPercentage_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
info: pruneInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csrByPercentage_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
info: pruneInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csrNnzByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csrNnzByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneDense2csrByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f32,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneDense2csrByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
A: *const f64,
lda: ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csrByPercentage_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
info: pruneInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csrByPercentage_bufferSizeExt(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *const f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *const ::core::ffi::c_int,
info: pruneInfo_t,
pBufferSizeInBytes: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csrNnzByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csrNnzByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedRowPtrC: *mut ::core::ffi::c_int,
nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpruneCsr2csrByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f32,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f32,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDpruneCsr2csrByPercentage(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnzA: ::core::ffi::c_int,
descrA: cusparseMatDescr_t,
csrSortedValA: *const f64,
csrSortedRowPtrA: *const ::core::ffi::c_int,
csrSortedColIndA: *const ::core::ffi::c_int,
percentage: f32,
descrC: cusparseMatDescr_t,
csrSortedValC: *mut f64,
csrSortedRowPtrC: *const ::core::ffi::c_int,
csrSortedColIndC: *mut ::core::ffi::c_int,
info: pruneInfo_t,
pBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
impl cusparseCsr2CscAlg_t {
pub const CUSPARSE_CSR2CSC_ALG1: cusparseCsr2CscAlg_t = cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseCsr2CscAlg_t {
CUSPARSE_CSR2CSC_ALG_DEFAULT = 1,
}
unsafe extern "C" {
/// This function converts a sparse matrix in CSR format (that is defined by the three arrays `csrVal`, `csrRowPtr`, and `csrColInd`) into a sparse matrix in CSC format (that is defined by arrays `cscVal`, `cscRowInd`, and `cscColPtr`). The resulting matrix can also be seen as the transpose of the original sparse matrix. Notice that this routine can also be used to convert a matrix in CSC format into a matrix in CSR format.
///
/// The routine requires extra storage proportional to the number of nonzero values `nnz`. It provides in output always the same matrix.
///
/// It is executed asynchronously with respect to the host, and it may return control to the application on the host before the result is ready.
///
/// The function [`cusparseCsr2cscEx2_bufferSize`] returns the size of the workspace needed by [`cusparseCsr2cscEx2`]. User needs to allocate a buffer of this size and give that buffer to [`cusparseCsr2cscEx2`] as an argument.
///
/// If `nnz == 0`, then `csrColInd`, `csrVal`, `cscVal`, and `cscRowInd` could have `NULL` value. In this case, `cscColPtr` is set to `idxBase` for all values.
///
/// If `m == 0` or `n == 0`, the pointers are not checked and the routine returns [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`].
///
/// [`cusparseCsr2cscEx2`] supports the following data types:
///
/// | `X`/`Y` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_8I` |
/// | `cudaDataType_t::CUDA_R_16F` |
/// | `cudaDataType_t::CUDA_R_16BF` |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_16F` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// [`cusparseCsr2cscEx2`] supports the following algorithms ([`cusparseCsr2CscAlg_t`]):
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | [`cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT`], `CUSPARSE_CSR2CSC_ALG1` | Default algorithm |
///
/// | Action | Notes |
/// | --- | --- |
/// | [`cusparseAction_t::CUSPARSE_ACTION_SYMBOLIC`] | Compute the “structure” of the CSC output matrix (offset, row indices) |
/// | [`cusparseAction_t::CUSPARSE_ACTION_NUMERIC`] | Compute the “structure” of the CSC output matrix and copy the values |
///
/// [`cusparseCsr2cscEx2`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
///
/// [`cusparseCsr2cscEx2`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `m`: Number of rows of the CSR input matrix; number of columns of the CSC ouput matrix.
/// - `n`: Number of columns of the CSR input matrix; number of rows of the CSC ouput matrix.
/// - `nnz`: Number of nonzero elements of the CSR and CSC matrices.
/// - `csrVal`: Value array of size `nnz` of the CSR matrix; of same type as `valType`.
/// - `csrRowPtr`: Integer array of size `m + 1` that containes the CSR row offsets.
/// - `csrColInd`: Integer array of size `nnz` that containes the CSR column indices.
/// - `cscVal`: Value array of size `nnz` of the CSC matrix; of same type as `valType`.
/// - `cscColPtr`: Integer array of size `n + 1` that containes the CSC column offsets.
/// - `cscRowInd`: Integer array of size `nnz` that containes the CSC row indices.
/// - `valType`: Value type for both CSR and CSC matrices.
/// - `copyValues`: [`cusparseAction_t::CUSPARSE_ACTION_SYMBOLIC`] or [`cusparseAction_t::CUSPARSE_ACTION_NUMERIC`].
/// - `idxBase`: Index base [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO`] or [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ONE`].
/// - `alg`: Algorithm implementation. see [`cusparseCsr2CscAlg_t`] for possible values.
/// - `buffer`: Pointer to workspace buffer.
pub fn cusparseCsr2cscEx2(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *const ::core::ffi::c_void,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *const ::core::ffi::c_int,
cscVal: *mut ::core::ffi::c_void,
cscColPtr: *mut ::core::ffi::c_int,
cscRowInd: *mut ::core::ffi::c_int,
valType: cudaDataType,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
alg: cusparseCsr2CscAlg_t,
buffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCsr2cscEx2_bufferSize(
handle: cusparseHandle_t,
m: ::core::ffi::c_int,
n: ::core::ffi::c_int,
nnz: ::core::ffi::c_int,
csrVal: *const ::core::ffi::c_void,
csrRowPtr: *const ::core::ffi::c_int,
csrColInd: *const ::core::ffi::c_int,
cscVal: *mut ::core::ffi::c_void,
cscColPtr: *mut ::core::ffi::c_int,
cscRowInd: *mut ::core::ffi::c_int,
valType: cudaDataType,
copyValues: cusparseAction_t,
idxBase: cusparseIndexBase_t,
alg: cusparseCsr2CscAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
/// This type indicates the format of the sparse matrix.
/// See cuSPARSE Storage Formats for their description.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseFormat_t {
/// The matrix is stored in Compressed Sparse Row (CSR) format.
CUSPARSE_FORMAT_CSR = 1,
/// The matrix is stored in Compressed Sparse Column (CSC) format.
CUSPARSE_FORMAT_CSC = 2,
/// The matrix is stored in Coordinate (COO) format organized in *Structure of Arrays (SoA)* layout.
CUSPARSE_FORMAT_COO = 3,
/// The matrix is stored in Blocked-Ellpack (Blocked-ELL) format.
CUSPARSE_FORMAT_BLOCKED_ELL = 5,
/// The matrix is stored in Block Sparse Row (BSR) format.
CUSPARSE_FORMAT_BSR = 6,
CUSPARSE_FORMAT_SLICED_ELLPACK = 7,
}
/// This type indicates the memory layout of a dense matrix.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseOrder_t {
/// The matrix is stored in column-major.
CUSPARSE_ORDER_COL = 1,
/// The matrix is stored in row-major.
CUSPARSE_ORDER_ROW = 2,
}
/// This type indicates the index type for representing the sparse matrix indices.
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseIndexType_t {
CUSPARSE_INDEX_16U = 1,
/// 32-bit signed integer \[0, 2^31 - 1\].
CUSPARSE_INDEX_32I = 2,
/// 64-bit signed integer \[0, 2^63 - 1\].
CUSPARSE_INDEX_64I = 3,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpVecDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseDnVecDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpMatDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseDnMatDescr {
_unused: [u8; 0],
}
pub type cusparseSpVecDescr_t = *mut cusparseSpVecDescr;
pub type cusparseDnVecDescr_t = *mut cusparseDnVecDescr;
pub type cusparseSpMatDescr_t = *mut cusparseSpMatDescr;
pub type cusparseDnMatDescr_t = *mut cusparseDnMatDescr;
pub type cusparseConstSpVecDescr_t = *const cusparseSpVecDescr;
pub type cusparseConstDnVecDescr_t = *const cusparseDnVecDescr;
pub type cusparseConstSpMatDescr_t = *const cusparseSpMatDescr;
pub type cusparseConstDnMatDescr_t = *const cusparseDnMatDescr;
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spVecDescr`.
///
/// [`cusparseCreateSpVec`] has the following constraints:
///
/// * `indices` and `values` must be aligned to the size of the datatypes specified by `idxType` and `valueType`, respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
/// - `size`: Size of the sparse vector.
/// - `nnz`: Number of non-zero entries of the sparse vector.
/// - `indices`: Indices of the sparse vector. Array with `nnz` elements.
/// - `values`: Values of the sparse vector. Array with `nnz` elements.
/// - `idxType`: Enumerator specifying the data type of `indices`.
/// - `idxBase`: Enumerator specifying the index base of `indices`.
/// - `valueType`: Enumerator specifying the datatype of `values`.
pub fn cusparseCreateSpVec(
spVecDescr: *mut cusparseSpVecDescr_t,
size: i64,
nnz: i64,
indices: *mut ::core::ffi::c_void,
values: *mut ::core::ffi::c_void,
idxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstSpVec(
spVecDescr: *mut cusparseConstSpVecDescr_t,
size: i64,
nnz: i64,
indices: *const ::core::ffi::c_void,
values: *const ::core::ffi::c_void,
idxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases the host memory allocated for the sparse vector descriptor `spVecDescr`.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
pub fn cusparseDestroySpVec(
spVecDescr: cusparseConstSpVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the sparse vector descriptor `spVecDescr`.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
/// - `size`: Size of the sparse vector.
/// - `nnz`: Number of non-zero entries of the sparse vector.
/// - `indices`: Indices of the sparse vector. Array with `nnz` elements.
/// - `values`: Values of the sparse vector. Array with `nnz` elements.
/// - `idxType`: Enumerator specifying the data type of `indices`.
/// - `idxBase`: Enumerator specifying the index base of `indices`.
/// - `valueType`: Enumerator specifying the datatype of `values`.
pub fn cusparseSpVecGet(
spVecDescr: cusparseSpVecDescr_t,
size: *mut i64,
nnz: *mut i64,
indices: *mut *mut ::core::ffi::c_void,
values: *mut *mut ::core::ffi::c_void,
idxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstSpVecGet(
spVecDescr: cusparseConstSpVecDescr_t,
size: *mut i64,
nnz: *mut i64,
indices: *mut *const ::core::ffi::c_void,
values: *mut *const ::core::ffi::c_void,
idxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `idxBase` field of the sparse vector descriptor `spVecDescr`.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
/// - `idxBase`: Enumerator specifying the index base of `indices`.
pub fn cusparseSpVecGetIndexBase(
spVecDescr: cusparseConstSpVecDescr_t,
idxBase: *mut cusparseIndexBase_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `values` field of the sparse vector descriptor `spVecDescr`.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
/// - `values`: Values of the sparse vector. Array with `nnz` elements.
pub fn cusparseSpVecGetValues(
spVecDescr: cusparseSpVecDescr_t,
values: *mut *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstSpVecGetValues(
spVecDescr: cusparseConstSpVecDescr_t,
values: *mut *const ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `values` field of the sparse vector descriptor `spVecDescr`.
///
/// [`cusparseDnVecSetValues`] has the following constraints:
///
/// * `values` must be aligned to the size of the datatype specified in `spVecDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spVecDescr`: Sparse vector descriptor.
/// - `values`: Values of the sparse vector. Array with `nnz` elements.
pub fn cusparseSpVecSetValues(
spVecDescr: cusparseSpVecDescr_t,
values: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the dense vector descriptor `dnVecDescr`.
///
/// [`cusparseCreateDnVec`] has the following constraints:
///
/// * `values` must be aligned to the size of the datatype specified by `valueType`. Refer to [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `dnVecDescr`: Dense vector descriptor.
/// - `size`: Size of the dense vector.
/// - `values`: Values of the dense vector. Array with `size` elements.
/// - `valueType`: Enumerator specifying the datatype of `values`.
pub fn cusparseCreateDnVec(
dnVecDescr: *mut cusparseDnVecDescr_t,
size: i64,
values: *mut ::core::ffi::c_void,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstDnVec(
dnVecDescr: *mut cusparseConstDnVecDescr_t,
size: i64,
values: *const ::core::ffi::c_void,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases the host memory allocated for the dense vector descriptor `dnVecDescr`.
///
/// # Parameters
///
/// - `dnVecDescr`: Dense vector descriptor.
pub fn cusparseDestroyDnVec(
dnVecDescr: cusparseConstDnVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the dense vector descriptor `dnVecDescr`.
///
/// # Parameters
///
/// - `dnVecDescr`: Dense vector descriptor.
/// - `size`: Size of the dense vector.
/// - `values`: Values of the dense vector. Array with `nnz` elements.
/// - `valueType`: Enumerator specifying the datatype of `values`.
pub fn cusparseDnVecGet(
dnVecDescr: cusparseDnVecDescr_t,
size: *mut i64,
values: *mut *mut ::core::ffi::c_void,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstDnVecGet(
dnVecDescr: cusparseConstDnVecDescr_t,
size: *mut i64,
values: *mut *const ::core::ffi::c_void,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `values` field of the dense vector descriptor `dnVecDescr`.
///
/// # Parameters
///
/// - `dnVecDescr`: Dense vector descriptor.
/// - `values`: Values of the dense vector.
pub fn cusparseDnVecGetValues(
dnVecDescr: cusparseDnVecDescr_t,
values: *mut *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstDnVecGetValues(
dnVecDescr: cusparseConstDnVecDescr_t,
values: *mut *const ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `values` field of the dense vector descriptor `dnVecDescr`.
///
/// [`cusparseDnVecSetValues`] has the following constraints:
///
/// * `values` must be aligned to the size of the datatype specified in `dnVecDescr`. Refer to [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `dnVecDescr`: Dense vector descriptor.
/// - `values`: Values of the dense vector. Array with `size` elements.
pub fn cusparseDnVecSetValues(
dnVecDescr: cusparseDnVecDescr_t,
values: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases the host memory allocated for the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
pub fn cusparseDestroySpMat(
spMatDescr: cusparseConstSpMatDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `format` field of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `format`: Storage format of the sparse matrix.
pub fn cusparseSpMatGetFormat(
spMatDescr: cusparseConstSpMatDescr_t,
format: *mut cusparseFormat_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `idxBase` field of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `idxBase`: Index base of the sparse matrix.
pub fn cusparseSpMatGetIndexBase(
spMatDescr: cusparseConstSpMatDescr_t,
idxBase: *mut cusparseIndexBase_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `values` field of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `values`: Values of the sparse matrix. Array with `nnz` elements.
pub fn cusparseSpMatGetValues(
spMatDescr: cusparseSpMatDescr_t,
values: *mut *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstSpMatGetValues(
spMatDescr: cusparseConstSpMatDescr_t,
values: *mut *const ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `values` field of the sparse matrix descriptor `spMatDescr`.
///
/// [`cusparseSpMatSetValues`] has the following constraints:
///
/// * `values` must be aligned to the size of its corresponding datatype specified in `spMatDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `values`: Values of the sparse matrix. Array with `nnz` elements.
pub fn cusparseSpMatSetValues(
spMatDescr: cusparseSpMatDescr_t,
values: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the sizes of the sparse matrix `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
pub fn cusparseSpMatGetSize(
spMatDescr: cusparseConstSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `batchCount` field of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `batchCount`: Number of batches of the sparse matrix.
pub fn cusparseSpMatGetStridedBatch(
spMatDescr: cusparseConstSpMatDescr_t,
batchCount: *mut ::core::ffi::c_int,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `batchCount`: Number of batches of the sparse matrix.
/// - `batchStride`: address offset between consecutive batches.
pub fn cusparseCooSetStridedBatch(
spMatDescr: cusparseSpMatDescr_t,
batchCount: ::core::ffi::c_int,
batchStride: i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `batchCount`: Number of batches of the sparse matrix.
/// - `offsetsBatchStride`: Address offset between consecutive batches for the row offset array.
/// - `columnsValuesBatchStride`: Address offset between consecutive batches for the column and value arrays.
pub fn cusparseCsrSetStridedBatch(
spMatDescr: cusparseSpMatDescr_t,
batchCount: ::core::ffi::c_int,
offsetsBatchStride: i64,
columnsValuesBatchStride: i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `batchCount`: Number of batches of the sparse matrix.
/// - `offsetsBatchStride`: Address offset between consecutive batches for the row offset array.
/// - `columnsBatchStride`: Address offset between consecutive batches for the column array.
pub fn cusparseBsrSetStridedBatch(
spMatDescr: cusparseSpMatDescr_t,
batchCount: ::core::ffi::c_int,
offsetsBatchStride: i64,
columnsBatchStride: i64,
ValuesBatchStride: i64,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpMatAttribute_t {
CUSPARSE_SPMAT_FILL_MODE = 0,
CUSPARSE_SPMAT_DIAG_TYPE = 1,
}
unsafe extern "C" {
/// The function gets the attributes of the sparse matrix descriptor `spMatDescr`.
///
/// | Attribute | Meaning | Possible Values |
/// | --- | --- | --- |
/// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_FILL_MODE`] | Indicates if the lower or upper part of a matrix is stored in sparse storage | [`cusparseFillMode_t::CUSPARSE_FILL_MODE_LOWER`], [`cusparseFillMode_t::CUSPARSE_FILL_MODE_UPPER`] |
/// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_DIAG_TYPE`] | Indicates if the matrix diagonal entries are unity | [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_NON_UNIT`], [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] |
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `attribute`: Attribute enumerator.
/// - `data`: Attribute value.
/// - `dataSize`: Size of the attribute in bytes for safety.
pub fn cusparseSpMatGetAttribute(
spMatDescr: cusparseConstSpMatDescr_t,
attribute: cusparseSpMatAttribute_t,
data: *mut ::core::ffi::c_void,
dataSize: size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function sets the attributes of the sparse matrix descriptor `spMatDescr`
///
/// | Attribute | Meaning | Possible Values |
/// | --- | --- | --- |
/// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_FILL_MODE`] | Indicates if the lower or upper part of a matrix is stored in sparse storage | [`cusparseFillMode_t::CUSPARSE_FILL_MODE_LOWER`], [`cusparseFillMode_t::CUSPARSE_FILL_MODE_UPPER`] |
/// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_DIAG_TYPE`] | Indicates if the matrix diagonal entries are unity | [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_NON_UNIT`], [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] |
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `attribute`: Attribute enumerator.
/// - `data`: Attribute value.
/// - `dataSize`: Size of the attribute in bytes for safety.
pub fn cusparseSpMatSetAttribute(
spMatDescr: cusparseSpMatDescr_t,
attribute: cusparseSpMatAttribute_t,
data: *mut ::core::ffi::c_void,
dataSize: size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` in the CSR format.
///
/// [`cusparseCreateCsr`] has the following constraints:
///
/// * `csrRowOffsets`, `csrColInd`, and `csrValues` must be aligned to the size of the datatypes specified by `csrRowOffsetsType`, `csrColIndType`, and `valueType`, respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
/// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
/// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
/// - `csrRowOffsetsType`: Data type of `csrRowOffsets`.
/// - `csrColIndType`: Data type of `csrColInd`.
/// - `idxBase`: Index base of `csrRowOffsets` and `csrColInd`.
/// - `valueType`: Datatype of `csrValues`.
pub fn cusparseCreateCsr(
spMatDescr: *mut cusparseSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
csrRowOffsets: *mut ::core::ffi::c_void,
csrColInd: *mut ::core::ffi::c_void,
csrValues: *mut ::core::ffi::c_void,
csrRowOffsetsType: cusparseIndexType_t,
csrColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstCsr(
spMatDescr: *mut cusparseConstSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
csrRowOffsets: *const ::core::ffi::c_void,
csrColInd: *const ::core::ffi::c_void,
csrValues: *const ::core::ffi::c_void,
csrRowOffsetsType: cusparseIndexType_t,
csrColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` in the CSC format.
///
/// [`cusparseCreateCsc`] has the following constraints:
///
/// * `cscColOffsets`, `cscRowInd`, and `cscValues` must be aligned to the size of the datatypes specified by `cscColOffsetsType`, `cscRowIndType`, and `valueType`, respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `cscColOffsets`: Column offsets of the sparse matrix. Array with `cols + 1` elements.
/// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
/// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
/// - `cscColOffsetsType`: Data type of `cscColOffsets`.
/// - `cscRowIndType`: Data type of `cscRowInd`.
/// - `idxBase`: Index base of `cscColOffsets` and `cscRowInd`.
/// - `valueType`: Datatype of `cscValues`.
pub fn cusparseCreateCsc(
spMatDescr: *mut cusparseSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
cscColOffsets: *mut ::core::ffi::c_void,
cscRowInd: *mut ::core::ffi::c_void,
cscValues: *mut ::core::ffi::c_void,
cscColOffsetsType: cusparseIndexType_t,
cscRowIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstCsc(
spMatDescr: *mut cusparseConstSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
cscColOffsets: *const ::core::ffi::c_void,
cscRowInd: *const ::core::ffi::c_void,
cscValues: *const ::core::ffi::c_void,
cscColOffsetsType: cusparseIndexType_t,
cscRowIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in CSR format.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
/// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
/// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
/// - `csrRowOffsetsType`: Data type of `csrRowOffsets`.
/// - `csrColIndType`: Data type of `csrColInd`.
/// - `idxBase`: Index base of `csrRowOffsets` and `csrColInd`.
/// - `valueType`: Datatype of `csrValues`.
pub fn cusparseCsrGet(
spMatDescr: cusparseSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
csrRowOffsets: *mut *mut ::core::ffi::c_void,
csrColInd: *mut *mut ::core::ffi::c_void,
csrValues: *mut *mut ::core::ffi::c_void,
csrRowOffsetsType: *mut cusparseIndexType_t,
csrColIndType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstCsrGet(
spMatDescr: cusparseConstSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
csrRowOffsets: *mut *const ::core::ffi::c_void,
csrColInd: *mut *const ::core::ffi::c_void,
csrValues: *mut *const ::core::ffi::c_void,
csrRowOffsetsType: *mut cusparseIndexType_t,
csrColIndType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in CSC format.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `cscColOffsets`: Col offsets of the sparse matrix. Array with `cols + 1` elements.
/// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
/// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
/// - `cscColOffsetsType`: Data type of `cscColOffsets`.
/// - `cscRowIndType`: Data type of `cscRowInd`.
/// - `idxBase`: Index base of `cscColOffsets` and `cscRowInd`.
/// - `valueType`: Datatype of `cscValues`.
pub fn cusparseCscGet(
spMatDescr: cusparseSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
cscColOffsets: *mut *mut ::core::ffi::c_void,
cscRowInd: *mut *mut ::core::ffi::c_void,
cscValues: *mut *mut ::core::ffi::c_void,
cscColOffsetsType: *mut cusparseIndexType_t,
cscRowIndType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstCscGet(
spMatDescr: cusparseConstSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
cscColOffsets: *mut *const ::core::ffi::c_void,
cscRowInd: *mut *const ::core::ffi::c_void,
cscValues: *mut *const ::core::ffi::c_void,
cscColOffsetsType: *mut cusparseIndexType_t,
cscRowIndType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
///
/// [`cusparseCsrSetPointers`] has the following constraints:
///
/// * `csrRowOffsets`, `csrColInd`, and `csrValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
/// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
/// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
pub fn cusparseCsrSetPointers(
spMatDescr: cusparseSpMatDescr_t,
csrRowOffsets: *mut ::core::ffi::c_void,
csrColInd: *mut ::core::ffi::c_void,
csrValues: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
///
/// [`cusparseCscSetPointers`] has the following constraints:
///
/// * `cscColOffsets`, `cscRowInd`, and `cscValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `cscColOffsets`: Col offsets of the sparse matrix. Array with `cols + 1` elements.
/// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
/// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
pub fn cusparseCscSetPointers(
spMatDescr: cusparseSpMatDescr_t,
cscColOffsets: *mut ::core::ffi::c_void,
cscRowInd: *mut ::core::ffi::c_void,
cscValues: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` for the Block Compressed Row (BSR) format.
///
/// [`cusparseCreateBsr`] has the following constraints:
///
/// * `bsrRowOffsets`, `bsrColInd`, and `bsrValues` must be aligned to the size of the datatypes specified by `bsrRowOffsetsType`, `bsrColIndType`, and `valueType`, respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `brows`: Number of block rows of the sparse matrix.
/// - `bcols`: Number of block columns of the sparse matrix.
/// - `bnnz`: Number of blocks of the sparse matrix.
/// - `rowBlockSize`: Number of rows of each block.
/// - `colBlockSize`: Number of columns of each block.
/// - `bsrRowOffsets`: Block row offsets of the sparse matrix. Array of size `brows + 1`.
/// - `bsrColInd`: Block column indices of the sparse matrix. Array of size `bnnz`.
/// - `bsrValues`: Values of the sparse matrix. Array of size `bnnz * rowBlockSize * colBlockSize`.
/// - `bsrRowOffsetsType`: Data type of `bsrRowOffsets`.
/// - `bsrColIndType`: Data type of `bsrColInd`.
/// - `idxBase`: Base index of `bsrRowOffsets` and `bsrColInd`.
/// - `valueType`: Datatype of `bsrValues`.
/// - `order`: Enumerator specifying the memory layout of values in each block.
pub fn cusparseCreateBsr(
spMatDescr: *mut cusparseSpMatDescr_t,
brows: i64,
bcols: i64,
bnnz: i64,
rowBlockSize: i64,
colBlockSize: i64,
bsrRowOffsets: *mut ::core::ffi::c_void,
bsrColInd: *mut ::core::ffi::c_void,
bsrValues: *mut ::core::ffi::c_void,
bsrRowOffsetsType: cusparseIndexType_t,
bsrColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
order: cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstBsr(
spMatDescr: *mut cusparseConstSpMatDescr_t,
brows: i64,
bcols: i64,
bnnz: i64,
rowBlockDim: i64,
colBlockDim: i64,
bsrRowOffsets: *const ::core::ffi::c_void,
bsrColInd: *const ::core::ffi::c_void,
bsrValues: *const ::core::ffi::c_void,
bsrRowOffsetsType: cusparseIndexType_t,
bsrColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
order: cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` in the COO format (Structure of Arrays layout).
///
/// [`cusparseCreateCoo`] has the following constraints:
///
/// * `cooRowInd`, `cooColInd`, and `cooValues` must be aligned to the size of the datatypes specified by `cooIdxType`, `cooIdxType`, and `valueType`. respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `cooRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
/// - `cooColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
/// - `cooValues`: Values of the sparse matrix. Array with `nnz` elements.
/// - `cooIdxType`: Data type of `cooRowInd` and `cooColInd`.
/// - `idxBase`: Index base of `cooRowInd` and `cooColInd`.
/// - `valueType`: Datatype of `cooValues`.
pub fn cusparseCreateCoo(
spMatDescr: *mut cusparseSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
cooRowInd: *mut ::core::ffi::c_void,
cooColInd: *mut ::core::ffi::c_void,
cooValues: *mut ::core::ffi::c_void,
cooIdxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstCoo(
spMatDescr: *mut cusparseConstSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
cooRowInd: *const ::core::ffi::c_void,
cooColInd: *const ::core::ffi::c_void,
cooValues: *const ::core::ffi::c_void,
cooIdxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in COO format (Array of Structures layout).
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of non-zero entries of the sparse matrix.
/// - `cooRowInd`: Row indices of the sparse matrix. Array `nnz` elements.
/// - `cooColInd`: Column indices of the sparse matrix. Array `nnz` elements.
/// - `cooValues`: Values of the sparse matrix. Array `nnz` elements.
/// - `idxBase`: Index base of `cooRowInd` and `cooColInd`.
/// - `valueType`: Datatype of `cooValues`.
pub fn cusparseCooGet(
spMatDescr: cusparseSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
cooRowInd: *mut *mut ::core::ffi::c_void,
cooColInd: *mut *mut ::core::ffi::c_void,
cooValues: *mut *mut ::core::ffi::c_void,
idxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstCooGet(
spMatDescr: cusparseConstSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
nnz: *mut i64,
cooRowInd: *mut *const ::core::ffi::c_void,
cooColInd: *mut *const ::core::ffi::c_void,
cooValues: *mut *const ::core::ffi::c_void,
idxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
///
/// [`cusparseCooSetPointers`] has the following constraints:
///
/// * `cooRows`, `cooColumns`, and `cooValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `cooRows`: Row indices of the sparse matrix. Array with `nnz` elements.
/// - `cooColumns`: Column indices of the sparse matrix. Array with `nnz` elements.
/// - `cooValues`: Values of the sparse matrix. Array with `nnz` elements.
pub fn cusparseCooSetPointers(
spMatDescr: cusparseSpMatDescr_t,
cooRows: *mut ::core::ffi::c_void,
cooColumns: *mut ::core::ffi::c_void,
cooValues: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` for the Blocked-Ellpack (ELL) format.
///
/// Blocked-ELL Column indices (`ellColInd`) are in the range `\[0, cols / ellBlockSize -1\]`. The array can contain `-1` values for indicating empty blocks.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `ellBlockSize`: Size of the ELL-Block.
/// - `ellCols`: Actual number of columns of the Blocked-Ellpack format (`ellValue` columns).
/// - `ellColInd`: Blocked-ELL Column indices. Array with `\[ellCols / ellBlockSize\]\[rows / ellBlockSize\]` elements.
/// - `ellValue`: Values of the sparse matrix. Array with `rows * ellCols` elements.
/// - `ellIdxType`: Data type of `ellColInd`.
/// - `idxBase`: Index base of `ellColInd`.
/// - `valueType`: Data type of `ellValue`.
pub fn cusparseCreateBlockedEll(
spMatDescr: *mut cusparseSpMatDescr_t,
rows: i64,
cols: i64,
ellBlockSize: i64,
ellCols: i64,
ellColInd: *mut ::core::ffi::c_void,
ellValue: *mut ::core::ffi::c_void,
ellIdxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstBlockedEll(
spMatDescr: *mut cusparseConstSpMatDescr_t,
rows: i64,
cols: i64,
ellBlockSize: i64,
ellCols: i64,
ellColInd: *const ::core::ffi::c_void,
ellValue: *const ::core::ffi::c_void,
ellIdxType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in Blocked-Ellpack (ELL) format.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `ellBlockSize`: Size of the ELL-Block.
/// - `ellCols`: Actual number of columns of the Blocked-Ellpack format.
/// - `ellColInd`: Column indices for the ELL-Block. Array with `\[cols / ellBlockSize\]\[rows / ellBlockSize\]` elements.
/// - `ellValue`: Values of the sparse matrix. Array with `rows * ellCols` elements.
/// - `ellIdxType`: Data type of `ellColInd`.
/// - `idxBase`: Index base of `ellColInd`.
/// - `valueType`: Datatype of `ellValue`.
pub fn cusparseBlockedEllGet(
spMatDescr: cusparseSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
ellBlockSize: *mut i64,
ellCols: *mut i64,
ellColInd: *mut *mut ::core::ffi::c_void,
ellValue: *mut *mut ::core::ffi::c_void,
ellIdxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstBlockedEllGet(
spMatDescr: cusparseConstSpMatDescr_t,
rows: *mut i64,
cols: *mut i64,
ellBlockSize: *mut i64,
ellCols: *mut i64,
ellColInd: *mut *const ::core::ffi::c_void,
ellValue: *mut *const ::core::ffi::c_void,
ellIdxType: *mut cusparseIndexType_t,
idxBase: *mut cusparseIndexBase_t,
valueType: *mut cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function initializes the sparse matrix descriptor `spMatDescr` for the Sliced Ellpack (SELL) format.
///
/// [`cusparseCreateSlicedEll`] has the following constraints:
///
/// * `sellSliceOffsets`, `sellColInd`, and `sellValues` must be aligned to the size of the datatypes specified by `sellSliceOffsetsType`, `sellColIndType`, and `valueType`, respectively. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `spMatDescr`: Sparse matrix descriptor.
/// - `rows`: Number of rows of the sparse matrix.
/// - `cols`: Number of columns of the sparse matrix.
/// - `nnz`: Number of nonzero elements in the sparse matrix.
/// - `sellValuesSize`: Total number of elements in `sellValues` array (nonzero and padding).
/// - `sliceSize`: The number of rows per slice.
/// - `sellSliceOffsets`: Slice offsets of the sparse matrix. Array of size $\left \lceil{\frac{rows}{sliceSize}}\right \rceil + 1$.
/// - `sellColInd`: Column indexes of the sparse matrix. Array of size `sellValuesSize`.
/// - `sellValues`: Values of the sparse matrix. Array of size `sellValuesSize` elements.
/// - `sellSliceOffsetsType`: Data type of `sellSliceOffsets`.
/// - `sellColIndType`: Data type of `sellColInd`.
/// - `idxBase`: Index base of `sellColInd`.
/// - `valueType`: Data type of `sellValues`.
pub fn cusparseCreateSlicedEll(
spMatDescr: *mut cusparseSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
sellValuesSize: i64,
sliceSize: i64,
sellSliceOffsets: *mut ::core::ffi::c_void,
sellColInd: *mut ::core::ffi::c_void,
sellValues: *mut ::core::ffi::c_void,
sellSliceOffsetsType: cusparseIndexType_t,
sellColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstSlicedEll(
spMatDescr: *mut cusparseConstSpMatDescr_t,
rows: i64,
cols: i64,
nnz: i64,
sellValuesSize: i64,
sliceSize: i64,
sellSliceOffsets: *const ::core::ffi::c_void,
sellColInd: *const ::core::ffi::c_void,
sellValues: *const ::core::ffi::c_void,
sellSliceOffsetsType: cusparseIndexType_t,
sellColIndType: cusparseIndexType_t,
idxBase: cusparseIndexBase_t,
valueType: cudaDataType,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function initializes the dense matrix descriptor `dnMatDescr`.
///
/// [`cusparseCreateDnMat`] has the following constraints:
///
/// * `values` must be aligned to the size of the datatype specified by `valueType`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `rows`: Number of rows of the dense matrix.
/// - `cols`: Number of columns of the dense matrix.
/// - `ld`: Leading dimension of the dense matrix.
/// - `values`: Values of the dense matrix. Array with `size` elements.
/// - `valueType`: Enumerator specifying the datatype of `values`.
/// - `order`: Enumerator specifying the memory layout of the dense matrix.
pub fn cusparseCreateDnMat(
dnMatDescr: *mut cusparseDnMatDescr_t,
rows: i64,
cols: i64,
ld: i64,
values: *mut ::core::ffi::c_void,
valueType: cudaDataType,
order: cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseCreateConstDnMat(
dnMatDescr: *mut cusparseConstDnMatDescr_t,
rows: i64,
cols: i64,
ld: i64,
values: *const ::core::ffi::c_void,
valueType: cudaDataType,
order: cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function releases the host memory allocated for the dense matrix descriptor `dnMatDescr`.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
pub fn cusparseDestroyDnMat(
dnMatDescr: cusparseConstDnMatDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the fields of the dense matrix descriptor `dnMatDescr`.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `rows`: Number of rows of the dense matrix.
/// - `cols`: Number of columns of the dense matrix.
/// - `ld`: Leading dimension of the dense matrix.
/// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
/// - `order`: Enumerator specifying the memory layout of the dense matrix.
pub fn cusparseDnMatGet(
dnMatDescr: cusparseDnMatDescr_t,
rows: *mut i64,
cols: *mut i64,
ld: *mut i64,
values: *mut *mut ::core::ffi::c_void,
type_: *mut cudaDataType,
order: *mut cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstDnMatGet(
dnMatDescr: cusparseConstDnMatDescr_t,
rows: *mut i64,
cols: *mut i64,
ld: *mut i64,
values: *mut *const ::core::ffi::c_void,
type_: *mut cudaDataType,
order: *mut cusparseOrder_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function returns the `values` field of the dense matrix descriptor `dnMatDescr`.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
pub fn cusparseDnMatGetValues(
dnMatDescr: cusparseDnMatDescr_t,
values: *mut *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseConstDnMatGetValues(
dnMatDescr: cusparseConstDnMatDescr_t,
values: *mut *const ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function sets the `values` field of the dense matrix descriptor `dnMatDescr`.
///
/// [`cusparseDnMatSetValues`] has the following constraints:
///
/// * `values` must be aligned to the size of the datatype specified in `dnMatDescr`. See [`cudaDataType_t`] for the description of the datatypes.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
pub fn cusparseDnMatSetValues(
dnMatDescr: cusparseDnMatDescr_t,
values: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function sets the number of batches and the batch stride of the dense matrix descriptor `dnMatDescr`.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `batchCount`: Number of batches of the dense matrix.
/// - `batchStride`: Address offset between a matrix and the next one in the batch. `batchStride ≥ ld * cols` if the matrix uses column-major layout, `batchStride ≥ ld * rows` otherwise.
pub fn cusparseDnMatSetStridedBatch(
dnMatDescr: cusparseDnMatDescr_t,
batchCount: ::core::ffi::c_int,
batchStride: i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function returns the number of batches and the batch stride of the dense matrix descriptor `dnMatDescr`.
///
/// # Parameters
///
/// - `dnMatDescr`: Dense matrix descriptor.
/// - `batchCount`: Number of batches of the dense matrix.
/// - `batchStride`: Address offset between a matrix and the next one in the batch.
pub fn cusparseDnMatGetStridedBatch(
dnMatDescr: cusparseConstDnMatDescr_t,
batchCount: *mut ::core::ffi::c_int,
batchStride: *mut i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function computes the sum of a sparse vector `vecX` and a dense vector `vecY`.
///
/// In other words,
///
/// ```text
/// for i=0 to n-1
/// Y\[i\] = beta * Y\[i\]
/// for i=0 to nnz-1
/// Y\[X_indices[i\]] += alpha * X_values\[i\]
/// ```
///
/// [`cusparseAxpby`] supports the following index type for representing the sparse vector `vecX`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseAxpby`] supports the following data types:
///
/// Uniform-precision computation:
///
/// | `X`/`Y`/`compute` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// Mixed-precision computation:
///
/// | `X`/`Y` | `compute` | |
/// | --- | --- | --- |
/// | `cudaDataType_t::CUDA_R_16F` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_R_16BF` | | |
/// | `cudaDataType_t::CUDA_C_16F` | `cudaDataType_t::CUDA_C_32F` | \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` | \[DEPRECATED\] | |
///
/// [`cusparseAxpby`] has the following constraints:
///
/// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
///
/// [`cusparseAxpby`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run if the sparse vector `vecX` indices are distinct
/// * The routine allows `indices` of `vecX` to be unsorted
///
/// [`cusparseAxpby`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseAxpby](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/axpby) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `alpha`: $\alpha$ scalar used for multiplication of compute type.
/// - `vecX`: Sparse vector `X`.
/// - `beta`: $\beta$ scalar used for multiplication of compute type.
/// - `vecY`: Dense vector `Y`.
#[deprecated]
pub fn cusparseAxpby(
handle: cusparseHandle_t,
alpha: *const ::core::ffi::c_void,
vecX: cusparseConstSpVecDescr_t,
beta: *const ::core::ffi::c_void,
vecY: cusparseDnVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function gathers the elements of the dense vector `vecY` into the sparse vector `vecX`
///
/// In other words,
///
/// ```text
/// for i=0 to nnz-1
/// X_values\[i\] = Y\[X_indices[i\]]
/// ```
///
/// [`cusparseGather`] supports the following index type for representing the sparse vector `vecX`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseGather`] supports the following data types:
///
/// | `X`/`Y` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_16F` |
/// | `cudaDataType_t::CUDA_R_16BF` |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_16F` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// [`cusparseGather`] has the following constraints:
///
/// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
///
/// [`cusparseGather`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run if the sparse vector `vecX` indices are distinct
/// * The routine allows `indices` of `vecX` to be unsorted
///
/// [`cusparseGather`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseGather](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gather) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `vecY`: Dense vector `Y`.
/// - `vecX`: Sparse vector `X`.
pub fn cusparseGather(
handle: cusparseHandle_t,
vecY: cusparseConstDnVecDescr_t,
vecX: cusparseSpVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function scatters the elements of the sparse vector `vecX` into the dense vector `vecY`
///
/// In other words,
///
/// ```text
/// for i=0 to nnz-1
/// Y\[X_indices[i\]] = X_values\[i\]
/// ```
///
/// [`cusparseScatter`] supports the following index type for representing the sparse vector `vecX`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseScatter`] supports the following data types:
///
/// | `X`/`Y` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_8I` |
/// | `cudaDataType_t::CUDA_R_16F` |
/// | `cudaDataType_t::CUDA_R_16BF` |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_16F` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// [`cusparseScatter`] has the following constraints:
///
/// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
///
/// [`cusparseScatter`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run if the sparse vector `vecX` indices are distinct
/// * The routine allows `indices` of `vecX` to be unsorted
///
/// [`cusparseScatter`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseScatter](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/scatter) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `vecX`: Sparse vector `X`.
/// - `vecY`: Dense vector `Y`.
pub fn cusparseScatter(
handle: cusparseHandle_t,
vecX: cusparseConstSpVecDescr_t,
vecY: cusparseDnVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function computes the Givens rotation matrix
///
/// to a sparse `vecX` and a dense vector `vecY`
///
/// In other words,
///
/// ```text
/// for i=0 to nnz-1
/// Y\[X_indices[i\]] = c * Y\[X_indices[i\]] - s * X_values\[i\]
/// X_values\[i\] = c * X_values\[i\] + s * Y\[X_indices[i\]]
/// ```
///
/// [`cusparseRot`] supports the following index type for representing the sparse vector `vecX`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseRot`] supports the following data types:
///
/// Uniform-precision computation:
///
/// | `X`/`Y`/`compute` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// Mixed-precision computation:
///
/// | `X`/`Y` | `compute` | |
/// | --- | --- | --- |
/// | `cudaDataType_t::CUDA_R_16F` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_R_16BF` | | |
/// | `cudaDataType_t::CUDA_C_16F` | `cudaDataType_t::CUDA_C_32F` | \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` | \[DEPRECATED\] | |
///
/// [`cusparseRot`] has the following constraints:
///
/// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
///
/// [`cusparseRot`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run if the sparse vector `vecX` indices are distinct
///
/// [`cusparseRot`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseRot](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/rot) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `c_coeff`: cosine element of the rotation matrix.
/// - `s_coeff`: sine element of the rotation matrix.
/// - `vecX`: Sparse vector `X`.
/// - `vecY`: Dense vector `Y`.
#[deprecated]
pub fn cusparseRot(
handle: cusparseHandle_t,
c_coeff: *const ::core::ffi::c_void,
s_coeff: *const ::core::ffi::c_void,
vecX: cusparseSpVecDescr_t,
vecY: cusparseDnVecDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpVV_bufferSize(
handle: cusparseHandle_t,
opX: cusparseOperation_t,
vecX: cusparseConstSpVecDescr_t,
vecY: cusparseConstDnVecDescr_t,
result: *const ::core::ffi::c_void,
computeType: cudaDataType,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function computes the inner dot product of a sparse vector `vecX` and a dense vector `vecY`
///
/// In other words,
/// $$
/// \operatorname{op}(X) =
/// \begin{cases}
/// X & \text{if } op(X) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// X & \text{if } op(X) = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The function [`cusparseSpVV_bufferSize`] returns the size of the workspace needed by [`cusparseSpVV`]
///
/// [`cusparseSpVV`] supports the following index type for representing the sparse vector `vecX`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// The data types combinations currently supported for [`cusparseSpVV`] are listed below:
///
/// Uniform-precision computation:
///
/// | `X`/`Y`/`computeType` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// Mixed-precision computation:
///
/// | `X`/`Y` | `computeType`/`result` | Notes |
/// | --- | --- | --- |
/// | `cudaDataType_t::CUDA_R_8I` | `cudaDataType_t::CUDA_R_32I` | |
/// | `cudaDataType_t::CUDA_R_8I` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_R_16F` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_R_16BF` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_C_16F` | `cudaDataType_t::CUDA_C_32F` | \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` | `cudaDataType_t::CUDA_C_32F` | \[DEPRECATED\] |
///
/// [`cusparseSpVV`] has the following constraints:
///
/// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
///
/// [`cusparseSpVV`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run if the sparse vector `vecX` indices are distinct
/// * The routine allows `indices` of `vecX` to be unsorted
///
/// [`cusparseSpVV`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSpVV](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spvv) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `opX`: Operation `op(X)` that is non-transpose or conjugate transpose.
/// - `vecX`: Sparse vector `X`.
/// - `vecY`: Dense vector `Y`.
/// - `result`: The resulting dot product.
/// - `computeType`: Datatype in which the computation is executed.
/// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
#[deprecated]
pub fn cusparseSpVV(
handle: cusparseHandle_t,
opX: cusparseOperation_t,
vecX: cusparseConstSpVecDescr_t,
vecY: cusparseConstDnVecDescr_t,
result: *mut ::core::ffi::c_void,
computeType: cudaDataType,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSparseToDenseAlg_t {
CUSPARSE_SPARSETODENSE_ALG_DEFAULT = 0,
}
unsafe extern "C" {
pub fn cusparseSparseToDense_bufferSize(
handle: cusparseHandle_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseDnMatDescr_t,
alg: cusparseSparseToDenseAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function converts the sparse matrix `matA` in CSR, CSC, or COO format into its dense representation `matB`. Blocked-ELL is not currently supported.
///
/// The function [`cusparseSparseToDense_bufferSize`] returns the size of the workspace needed by [`cusparseSparseToDense`].
///
/// [`cusparseSparseToDense`] supports the following index type for representing the sparse matrix `matA`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseSparseToDense`] supports the following data types:
///
/// | `A`/`B` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_8I` |
/// | `cudaDataType_t::CUDA_R_16F` |
/// | `cudaDataType_t::CUDA_R_16BF` |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_16F` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_16BF` \[DEPRECATED\] |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// `cusparseSparse2Dense()` supports the following algorithm:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | [`cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT`] | Default algorithm |
///
/// [`cusparseSparseToDense`] has the following properties:
///
/// * The routine requires no extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run
/// * The routine allows the indices of `matA` to be unsorted
///
/// [`cusparseSparseToDense`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSparseToDense](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/sparse2dense_csr) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `matA`: Sparse matrix `A`.
/// - `matB`: Dense matrix `B`.
/// - `alg`: Algorithm for the computation.
pub fn cusparseSparseToDense(
handle: cusparseHandle_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseDnMatDescr_t,
alg: cusparseSparseToDenseAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseDenseToSparseAlg_t {
CUSPARSE_DENSETOSPARSE_ALG_DEFAULT = 0,
}
unsafe extern "C" {
pub fn cusparseDenseToSparse_bufferSize(
handle: cusparseHandle_t,
matA: cusparseConstDnMatDescr_t,
matB: cusparseSpMatDescr_t,
alg: cusparseDenseToSparseAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDenseToSparse_analysis(
handle: cusparseHandle_t,
matA: cusparseConstDnMatDescr_t,
matB: cusparseSpMatDescr_t,
alg: cusparseDenseToSparseAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseDenseToSparse_convert(
handle: cusparseHandle_t,
matA: cusparseConstDnMatDescr_t,
matB: cusparseSpMatDescr_t,
alg: cusparseDenseToSparseAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpMVAlg_t {
CUSPARSE_SPMV_ALG_DEFAULT = 0,
CUSPARSE_SPMV_CSR_ALG1 = 2,
CUSPARSE_SPMV_CSR_ALG2 = 3,
CUSPARSE_SPMV_COO_ALG1 = 1,
CUSPARSE_SPMV_COO_ALG2 = 4,
CUSPARSE_SPMV_SELL_ALG1 = 5,
CUSPARSE_SPMV_BSR_ALG1 = 6,
}
unsafe extern "C" {
/// This function performs the multiplication of a sparse matrix `matA` and a dense vector `vecX`
///
/// where
///
/// * `op(A)` is a sparse matrix of size $m \times k$
/// * `X` is a dense vector of size $k$
/// * `Y` is a dense vector of size $m$
/// * $\alpha$ and $\beta$ are scalars
///
/// Also, for matrix `A`:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } op(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } op(A) = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } op(A) = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The function [`cusparseSpMV_bufferSize`] returns the size of the workspace needed by [`cusparseSpMV_preprocess`] and [`cusparseSpMV`]
///
/// The sparse matrix formats currently supported are listed below:
///
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_COO`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSC`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
/// * `cusparseFormat_t::CUSPARSE_FORMAT_SLICED_ELL`
///
/// [`cusparseSpMV`] supports the following index type for representing the sparse matrix `matA`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// [`cusparseSpMV`] supports the following data types:
///
/// Uniform-precision computation:
///
/// | `A`/`X`/ `Y`/`computeType` |
/// | --- |
/// | `cudaDataType_t::CUDA_R_32F` |
/// | `cudaDataType_t::CUDA_R_64F` |
/// | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_C_64F` |
///
/// Mixed-precision computation:
///
/// | `A`/`X` | `Y` | `computeType` | Notes |
/// | --- | --- | --- | --- |
/// | `cudaDataType_t::CUDA_R_8I` | `cudaDataType_t::CUDA_R_32I` | `cudaDataType_t::CUDA_R_32I` | |
/// | `cudaDataType_t::CUDA_R_8I` | `cudaDataType_t::CUDA_R_32F` | `cudaDataType_t::CUDA_R_32F` | |
/// | `cudaDataType_t::CUDA_R_16F` | | | |
/// | `cudaDataType_t::CUDA_R_16BF` | | | |
/// | `cudaDataType_t::CUDA_R_16F` | `cudaDataType_t::CUDA_R_16F` | | |
/// | `cudaDataType_t::CUDA_R_16BF` | `cudaDataType_t::CUDA_R_16BF` | | |
/// | `cudaDataType_t::CUDA_C_32F` | `cudaDataType_t::CUDA_C_32F` | `cudaDataType_t::CUDA_C_32F` | |
/// | `cudaDataType_t::CUDA_C_16F` | `cudaDataType_t::CUDA_C_16F` | \[DEPRECATED\] | |
/// | `cudaDataType_t::CUDA_C_16BF` | `cudaDataType_t::CUDA_C_16BF` | \[DEPRECATED\] | |
///
/// | `A` | `X`/`Y`/`computeType` |
/// | --- | --- |
/// | `cudaDataType_t::CUDA_R_32F` | `cudaDataType_t::CUDA_R_64F` |
///
/// Mixed Regular/Complex computation:
///
/// | `A` | `X`/`Y`/`computeType` |
/// | --- | --- |
/// | `cudaDataType_t::CUDA_R_32F` | `cudaDataType_t::CUDA_C_32F` |
/// | `cudaDataType_t::CUDA_R_64F` | `cudaDataType_t::CUDA_C_64F` |
///
/// NOTE: `cudaDataType_t::CUDA_R_16F`, `cudaDataType_t::CUDA_R_16BF`, `cudaDataType_t::CUDA_C_16F`, and `cudaDataType_t::CUDA_C_16BF` data types always imply mixed-precision computation.
///
/// [`cusparseSpMV`] supports the following algorithms:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_ALG_DEFAULT`] | Default algorithm for any sparse matrix format. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`] | Default algorithm for COO sparse matrix format. May produce slightly different results during different runs with the same input parameters. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] | Provides deterministic (bit-wise) results for each run. If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`]. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`] | Default algorithm for CSR/CSC sparse matrix format. May produce slightly different results during different runs with the same input parameters. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`] | Provides deterministic (bit-wise) results for each run. If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`]. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_SELL_ALG1`] | Default algorithm for Sliced Ellpack sparse matrix format. Provides deterministic (bit-wise) results for each run. |
/// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_BSR_ALG1`] | Default algorithm for BSR sparse matrix format. Provides deterministic (bit-wise) results for each run. Supports only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`. Supports both row-major and column-major block layouts in `A`. |
///
/// Calling [`cusparseSpMV_preprocess`] is optional.
/// It may accelerate subsequent calls to [`cusparseSpMV`].
/// It is useful when [`cusparseSpMV`] is called multiple times with the same sparsity pattern (`matA`).
///
/// Calling [`cusparseSpMV_preprocess`] with `buffer` makes that buffer “active” for `matA` SpMV calls.
/// Subsequent calls to [`cusparseSpMV`] with `matA` and the active buffer
/// must use the same values for all parameters as the call to [`cusparseSpMV_preprocess`].
/// The exceptions are: `alpha`, `beta`, `vecX`, `vecY`, and the values (but not indices) of `matA` may be different.
/// Importantly, the buffer contents must be unmodified since the call to [`cusparseSpMV_preprocess`].
/// When [`cusparseSpMV`] is called with `matA` and its active buffer, it may read acceleration data from the buffer.
///
/// Calling [`cusparseSpMV_preprocess`] again with `matA` and a new buffer will make the new buffer active,
/// forgetting about the previously-active buffer and making it inactive.
/// For [`cusparseSpMV`], there can only be one active buffer per sparse matrix at a time.
/// To get the effect of multiple active buffers for a single sparse matrix,
/// create multiple matrix handles that all point to the same index and value buffers,
/// and call [`cusparseSpMV_preprocess`] once per handle with different workspace buffers.
///
/// Calling [`cusparseSpMV`] with an inactive buffer is always permitted.
/// However, there may be no acceleration from the preprocessing in that case.
///
/// For the purposes of thread safety,
/// [`cusparseSpMV_preprocess`] is writing to `matA` internal state.
///
/// **Performance notes:**
///
/// * [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`] provide higher performance than [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`].
/// * In general, `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is 3x faster than `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`.
/// * Using [`cusparseSpMV_preprocess`] helps improve performance of [`cusparseSpMV`] in CSR. It is beneficial when we need to run [`cusparseSpMV`] multiple times with a same matrix ([`cusparseSpMV_preprocess`] is executed only once).
///
/// [`cusparseSpMV`] has the following properties:
///
/// * The routine requires extra storage for CSR/CSC format (all algorithms) and for COO format with [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] algorithm.
/// * Provides deterministic (bit-wise) results for each run only for [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`], [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_BSR_ALG1`] algorithms, and `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`.
/// * The routine supports asynchronous execution.
/// * compute-sanitizer could report false race conditions for this routine when `beta == 0`. This is for optimization purposes and does not affect the correctness of the computation.
/// * The routine allows the indices of `matA` to be unsorted.
///
/// [`cusparseSpMV`] supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSpMV CSR](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmv_csr) and [cusparseSpMV COO](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmv_coo) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `opA`: Operation `op(A)`.
/// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
/// - `matA`: Sparse matrix `A`.
/// - `vecX`: Dense vector `X`.
/// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
/// - `vecY`: Dense vector `Y`.
/// - `computeType`: Datatype in which the computation is executed.
/// - `alg`: Algorithm for the computation.
/// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
pub fn cusparseSpMV(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
beta: *const ::core::ffi::c_void,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpMVAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpMV_bufferSize(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
beta: *const ::core::ffi::c_void,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpMVAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpMV_preprocess(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
beta: *const ::core::ffi::c_void,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpMVAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpSVAlg_t {
CUSPARSE_SPSV_ALG_DEFAULT = 0,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpSVUpdate_t {
CUSPARSE_SPSV_UPDATE_GENERAL = 0,
CUSPARSE_SPSV_UPDATE_DIAGONAL = 1,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpSVDescr {
_unused: [u8; 0],
}
pub type cusparseSpSVDescr_t = *mut cusparseSpSVDescr;
unsafe extern "C" {
pub fn cusparseSpSV_createDescr(descr: *mut cusparseSpSVDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSV_destroyDescr(descr: cusparseSpSVDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSV_bufferSize(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpSVAlg_t,
spsvDescr: cusparseSpSVDescr_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSV_analysis(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpSVAlg_t,
spsvDescr: cusparseSpSVDescr_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSV_solve(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
vecX: cusparseConstDnVecDescr_t,
vecY: cusparseDnVecDescr_t,
computeType: cudaDataType,
alg: cusparseSpSVAlg_t,
spsvDescr: cusparseSpSVDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSV_updateMatrix(
handle: cusparseHandle_t,
spsvDescr: cusparseSpSVDescr_t,
newValues: *mut ::core::ffi::c_void,
updatePart: cusparseSpSVUpdate_t,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpSMAlg_t {
CUSPARSE_SPSM_ALG_DEFAULT = 0,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpSMUpdate_t {
CUSPARSE_SPSM_UPDATE_GENERAL = 0,
CUSPARSE_SPSM_UPDATE_DIAGONAL = 1,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpSMDescr {
_unused: [u8; 0],
}
pub type cusparseSpSMDescr_t = *mut cusparseSpSMDescr;
unsafe extern "C" {
pub fn cusparseSpSM_createDescr(descr: *mut cusparseSpSMDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSM_destroyDescr(descr: cusparseSpSMDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSM_bufferSize(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpSMAlg_t,
spsmDescr: cusparseSpSMDescr_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSM_analysis(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpSMAlg_t,
spsmDescr: cusparseSpSMDescr_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSM_solve(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpSMAlg_t,
spsmDescr: cusparseSpSMDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpSM_updateMatrix(
handle: cusparseHandle_t,
spsmDescr: cusparseSpSMDescr_t,
newValues: *mut ::core::ffi::c_void,
updatePart: cusparseSpSMUpdate_t,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpMMAlg_t {
CUSPARSE_SPMM_ALG_DEFAULT = 0,
CUSPARSE_SPMM_COO_ALG1 = 1,
CUSPARSE_SPMM_COO_ALG2 = 2,
CUSPARSE_SPMM_COO_ALG3 = 3,
CUSPARSE_SPMM_COO_ALG4 = 5,
CUSPARSE_SPMM_CSR_ALG1 = 4,
CUSPARSE_SPMM_CSR_ALG2 = 6,
CUSPARSE_SPMM_CSR_ALG3 = 12,
CUSPARSE_SPMM_BLOCKED_ELL_ALG1 = 13,
CUSPARSE_SPMM_BSR_ALG1 = 14,
}
unsafe extern "C" {
pub fn cusparseSpMM_bufferSize(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpMMAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpMM_preprocess(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpMMAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// The function performs the multiplication of a sparse matrix `matA` and a dense matrix `matB`.
///
/// where
///
/// * `op(A)` is a sparse matrix of size $m \times k$
/// * `op(B)` is a dense matrix of size $k \times n$
/// * `C` is a dense matrix of size $m \times n$
/// * $\alpha$ and $\beta$ are scalars
///
/// The routine can be also used to perform the multiplication of a dense matrix and a sparse matrix by switching the dense matrices layout:
///
/// where $\mathbf{B}_{C}$, $\mathbf{C}_{C}$ indicate column-major layout, while $\mathbf{B}_{R}$, $\mathbf{C}_{R}$ refer to row-major layout
///
/// Also, for matrix `A` and `B`:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } op(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } op(A) = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// A^H & \text{if } op(A) = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$:
///
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } op(B) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } op(B) = \text{CUSPARSE_OPERATION_TRANSPOSE} \\
/// B^H & \text{if } op(B) = \text{CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE}
/// \end{cases}
/// $$
///
/// When using the (conjugate) transpose of the sparse matrix `A`, this routine may produce slightly different results during different runs with the same input parameters.
///
/// The function `cusparseSpMM_bufferSize()` returns the size of the workspace needed by `cusparseSpMM()`
///
/// Calling `cusparseSpMM_preprocess()` is optional.
/// It may accelerate subsequent calls to `cusparseSpMM()`.
/// It is useful when `cusparseSpMM()` is called multiple times with the same sparsity pattern (`matA`).
/// It provides performance advantages with `CUSPARSE_SPMM_CSR_ALG1` or `CUSPARSE_SPMM_CSR_ALG3`.
/// For all other formats and algorithms have no effect.
///
/// Calling `cusparseSpMM_preprocess()` with `buffer` makes that buffer “active” for `matA` SpMM calls.
/// Subsequent calls to `cusparseSpMM()` with `matA` and the active buffer
/// must use the same values for all parameters as the call to `cusparseSpMM_preprocess()`.
/// The exceptions are: `alpha`, `beta`, `matX`, `matY`, and the values (but not indices) of `matA` may be different.
/// Importantly, the buffer contents must be unmodified since the call to `cusparseSpMM_preprocess()`.
/// When `cusparseSpMM()` is called with `matA` and its active buffer, it may read acceleration data from the buffer.
///
/// Calling `cusparseSpMM_preprocess()` again with `matA` and a new buffer will make the new buffer active,
/// forgetting about the previously-active buffer and making it inactive.
/// For `cusparseSpMM()`, there can only be one active buffer per sparse matrix at a time.
/// To get the effect of multiple active buffers for a single sparse matrix,
/// create multiple matrix handles that all point to the same index and value buffers,
/// and call `cusparseSpMM_preprocess()` once per handle with different workspace buffers.
///
/// Calling `cusparseSpMM()` with an inactive buffer is always permitted.
/// However, there may be no acceleration from the preprocessing in that case.
///
/// For the purposes of thread safety,
/// `cusparseSpMM_preprocess()` is writing to `matA` internal state.
///
/// `cusparseSpMM` supports the following sparse matrix formats:
///
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_COO`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSC`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`]
///
/// | | |
/// | --- | --- |
/// | **(1)** | **COO/CSR/CSC/BSR FORMATS** |
///
/// `cusparseSpMM` supports the following index type for representing the sparse matrix `matA`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// `cusparseSpMM` supports the following data types:
///
/// Uniform-precision computation:
///
/// | `A`/`B`/ `C`/`computeType` |
/// | --- |
/// | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_64F`] |
/// | [`cudaDataType_t::CUDA_C_32F`] |
/// | [`cudaDataType_t::CUDA_C_64F`] |
///
/// Mixed-precision computation:
///
/// | `A`/`B` | `C` | `computeType` | |
/// | --- | --- | --- | --- |
/// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] | |
/// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | |
/// | [`cudaDataType_t::CUDA_R_16F`] | | | |
/// | [`cudaDataType_t::CUDA_R_16BF`] | | | |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | | |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] | | |
/// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_32F`] | \[DEPRECATED\] |
/// | [`cudaDataType_t::CUDA_C_16BF`] | [`cudaDataType_t::CUDA_C_16BF`] | \[DEPRECATED\] | |
///
/// NOTE: [`cudaDataType_t::CUDA_R_16F`], [`cudaDataType_t::CUDA_R_16BF`], [`cudaDataType_t::CUDA_C_16F`], and [`cudaDataType_t::CUDA_C_16BF`] data types always imply mixed-precision computation.
///
/// `cusparseSpMM` supports the following algorithms:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | `CUSPARSE_SPMM_ALG_DEFAULT` | Default algorithm for any sparse matrix format |
/// | `CUSPARSE_SPMM_COO_ALG1` | Algorithm 1 for COO sparse matrix format
/// - May provide better performance for small number of nnz
/// - Provides the best performance with column-major layout
/// - It supports batched computation
/// - May produce slightly different results during different runs with the same input parameters |
/// | `CUSPARSE_SPMM_COO_ALG2` | Algorithm 2 for COO sparse matrix format
/// - It provides deterministic result
/// - Provides the best performance with column-major layout
/// - In general, slower than Algorithm 1
/// - It supports batched computation
/// - It requires additional memory
/// - If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to `CUSPARSE_SPMM_COO_ALG1` |
/// | `CUSPARSE_SPMM_COO_ALG3` | Algorithm 3 for COO sparse matrix format
/// - May provide better performance for large number of nnz
/// - May produce slightly different results during different runs with the same input parameters |
/// | `CUSPARSE_SPMM_COO_ALG4` | Algorithm 4 for COO sparse matrix format
/// - Provides better performance with row-major layout
/// - It supports batched computation
/// - May produce slightly different results during different runs with the same input parameters |
/// | `CUSPARSE_SPMM_CSR_ALG1` | Algorithm 1 for CSR/CSC sparse matrix format
/// - Provides the best performance with column-major layout
/// - It supports batched computation
/// - It requires additional memory
/// - May produce slightly different results during different runs with the same input parameters |
/// | `CUSPARSE_SPMM_CSR_ALG2` | Algorithm 2 for CSR/CSC sparse matrix format
/// - Provides the best performance with row-major layout
/// - It supports batched computation
/// - It requires additional memory
/// - May produce slightly different results during different runs with the same input parameters |
/// | `CUSPARSE_SPMM_CSR_ALG3` | Algorithm 3 for CSR sparse matrix format
/// - It provides deterministic result
/// - It requires additional memory
/// - It supports only CSR matrix and `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`
/// - It does not support `opB == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE`
/// - It does not support `CUDA_C_16F and CUDA_C_16BF` data types |
/// | `CUSPARSE_SPMM_BSR_ALG1` | Algorithm 1 for BSR sparse matrix format
/// - It provides deterministic result
/// - It requires no additional memory
/// - It supports only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`
/// - It does not support [`cudaDataType_t::CUDA_C_16F`] and [`cudaDataType_t::CUDA_C_16BF`] data types
/// - It does not support column-major blocks in `A` |
///
/// NOTE: When using `cusparseSpMM` for mixed-precision computation on COO or CSR matrices, it defaults to algorithms `CUSPARSE_SPMM_COO_ALG2` and `CUSPARSE_SPMM_CSR_ALG3`, respectively. If the required computation isn’t supported by those algorithms, the mixed-precision operation will fail.
///
/// **Performance notes:**
///
/// * Row-major layout provides higher performance than column-major
/// * `CUSPARSE_SPMM_COO_ALG4` and `CUSPARSE_SPMM_CSR_ALG2` should be used with row-major layout, while `CUSPARSE_SPMM_COO_ALG1`, `CUSPARSE_SPMM_COO_ALG2`, `CUSPARSE_SPMM_COO_ALG3`, and `CUSPARSE_SPMM_CSR_ALG1` with column-major layout
/// * For `beta != 1`, most algorithms scale the output matrix before the main computation
/// * For `n == 1`, the routine may use `cusparseSpMV()`
///
/// `cusparseSpMM()` with all algorithms support the following batch modes except for `CUSPARSE_SPMM_CSR_ALG3`:
///
/// * $C_{i} = A \cdot B_{i}$
/// * $C_{i} = A_{i} \cdot B$
/// * $C_{i} = A_{i} \cdot B_{i}$
///
/// The number of batches and their strides can be set by using `cusparseCooSetStridedBatch`, `cusparseCsrSetStridedBatch`, and `cusparseDnMatSetStridedBatch`. The maximum number of batches for `cusparseSpMM()` is 65,535.
///
/// `cusparseSpMM()` has the following properties:
///
/// * The routine requires no extra storage for `CUSPARSE_SPMM_COO_ALG1`, `CUSPARSE_SPMM_COO_ALG3`, `CUSPARSE_SPMM_COO_ALG4`, `CUSPARSE_SPMM_BSR_ALG1`
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run only for `CUSPARSE_SPMM_COO_ALG2`, `CUSPARSE_SPMM_CSR_ALG3`, and `CUSPARSE_SPMM_BSR_ALG1` algorithms
/// * `compute-sanitizer` could report false race conditions for this routine. This is for optimization purposes and does not affect the correctness of the computation
/// * The routine allows the indices of `matA` to be unsorted
///
/// `cusparseSpMM()` supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSpMM CSR](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr) and [cusparseSpMM COO](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_coo) for a code example. For batched computation please visit [cusparseSpMM CSR Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr_batched) and [cusparseSpMM COO Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_coo_batched).
///
/// | | |
/// | --- | --- |
/// | **(2)** | **BLOCKED-ELLPACK FORMAT** |
///
/// `cusparseSpMM` supports the following data types for [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`] format and the following GPU architectures for exploiting NVIDIA Tensor Cores:
///
/// | `A`/`B` | `C` | `computeType` | `opB` | `Compute Capability` |
/// | --- | --- | --- | --- | --- |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | `N`, `T` | `≥ 70` |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 70` |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 70` |
/// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] | `N` column-major | `≥ 75` |
/// | `T` row-major | | | | |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
/// | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
/// | [`cudaDataType_t::CUDA_R_64F`] | [`cudaDataType_t::CUDA_R_64F`] | [`cudaDataType_t::CUDA_R_64F`] | `N`, `T` | `≥ 80` |
///
/// `cusparseSpMM` supports the following algorithms with [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`] format:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | `CUSPARSE_SPMM_ALG_DEFAULT` | Default algorithm for any sparse matrix format |
/// | `CUSPARSE_SPMM_BLOCKED_ELL_ALG1` | Default algorithm for Blocked-ELL format |
///
/// **Performance notes:**
///
/// * Blocked-ELL SpMM provides the best performance with Power-of-2 Block-Sizes.
/// * Large Block-Sizes (e.g. ≥ 64) provide the best performance.
///
/// The function has the following limitations:
///
/// * The pointer mode must be equal to [`cusparsePointerMode_t::CUSPARSE_POINTER_MODE_HOST`]
/// * Only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is supported.
/// * `opB == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE` is not supported.
/// * Only [`cusparseIndexType_t::CUSPARSE_INDEX_32I`] is supported.
///
/// Please visit [cuSPARSE Library Samples - cusparseSpMM Blocked-ELL](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_blockedell) for a code example.
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `opA`: Operation `op(A)`.
/// - `opB`: Operation `op(B)`.
/// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
/// - `matA`: Sparse matrix `A`.
/// - `matB`: Dense matrix `B`.
/// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
/// - `matC`: Dense matrix `C`.
/// - `computeType`: Datatype in which the computation is executed.
/// - `alg`: Algorithm for the computation.
/// - `externalBuffer`: Pointer to workspace buffer of at least `bufferSize` bytes.
pub fn cusparseSpMM(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpMMAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpGEMMAlg_t {
CUSPARSE_SPGEMM_DEFAULT = 0,
CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC = 1,
CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC = 2,
CUSPARSE_SPGEMM_ALG1 = 3,
CUSPARSE_SPGEMM_ALG2 = 4,
CUSPARSE_SPGEMM_ALG3 = 5,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpGEMMDescr {
_unused: [u8; 0],
}
pub type cusparseSpGEMMDescr_t = *mut cusparseSpGEMMDescr;
unsafe extern "C" {
pub fn cusparseSpGEMM_createDescr(
descr: *mut cusparseSpGEMMDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_destroyDescr(descr: cusparseSpGEMMDescr_t) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_workEstimation(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
bufferSize1: *mut size_t,
externalBuffer1: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_getNumProducts(
spgemmDescr: cusparseSpGEMMDescr_t,
num_prods: *mut i64,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_estimateMemory(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
chunk_fraction: f32,
bufferSize3: *mut size_t,
externalBuffer3: *mut ::core::ffi::c_void,
bufferSize2: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_compute(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
bufferSize2: *mut size_t,
externalBuffer2: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMM_copy(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMMreuse_workEstimation(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
matC: cusparseSpMatDescr_t,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
bufferSize1: *mut size_t,
externalBuffer1: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
///
/// * This function requires temporary extra storage that is allocated internally.
/// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
/// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
pub fn cusparseSpGEMMreuse_nnz(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
matC: cusparseSpMatDescr_t,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
bufferSize2: *mut size_t,
externalBuffer2: *mut ::core::ffi::c_void,
bufferSize3: *mut size_t,
externalBuffer3: *mut ::core::ffi::c_void,
bufferSize4: *mut size_t,
externalBuffer4: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMMreuse_copy(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
matC: cusparseSpMatDescr_t,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
bufferSize5: *mut size_t,
externalBuffer5: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpGEMMreuse_compute(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstSpMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpGEMMAlg_t,
spgemmDescr: cusparseSpGEMMDescr_t,
) -> cusparseStatus_t;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSDDMMAlg_t {
CUSPARSE_SDDMM_ALG_DEFAULT = 0,
}
unsafe extern "C" {
pub fn cusparseSDDMM_bufferSize(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstDnMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSDDMMAlg_t,
bufferSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSDDMM_preprocess(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstDnMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSDDMMAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// This function performs the multiplication of `matA` and `matB`, followed by an element-wise multiplication with the sparsity pattern of `matC`. Formally, it performs the following operation:
///
/// where
///
/// * `op(A)` is a dense matrix of size $m \times k$
/// * `op(B)` is a dense matrix of size $k \times n$
/// * `C` is a sparse matrix of size $m \times n$
/// * $\alpha$ and $\beta$ are scalars
/// * $\circ$ denotes the Hadamard (entry-wise) matrix product, and ${spy}\left( \mathbf{C} \right)$ is the structural sparsity pattern matrix of `C` defined as:
/// $$
/// \operatorname{spy}(C)_{ij} =
/// \begin{cases}
/// 1 & \text{if } C_{ij} \text{ is an entry stored in matC} \\
/// 0 & \text{otherwise}
/// \end{cases}
/// $$
///
/// Also, for matrix `A` and `B`:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } op(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } op(A) = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$:
///
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } op(B) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } op(B) = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// The function `cusparseSDDMM_bufferSize()` returns the size of the workspace needed by `cusparseSDDMM` or `cusparseSDDMM_preprocess`.
///
/// Calling `cusparseSDDMM_preprocess()` is optional.
/// It may accelerate subsequent calls to `cusparseSDDMM()`.
/// It is useful when `cusparseSDDMM()` is called multiple times with the same sparsity pattern (`matC`).
///
/// Calling `cusparseSDDMM_preprocess()` with `buffer` makes that buffer “active” for `matC` SDDMM calls.
/// Subsequent calls to `cusparseSDDMM()` with `matC` and the active buffer
/// must use the same values for all parameters as the call to `cusparseSDDMM_preprocess()`.
/// The exceptions are: `alpha`, `beta`, `matA`, `matB`, and the values (but not indices) of `matC` may be different.
/// Importantly, the buffer contents must be unmodified since the call to `cusparseSDDMM_preprocess()`.
/// When `cusparseSDDMM()` is called with `matC` and its active buffer, it may read acceleration data from the buffer.
///
/// Calling `cusparseSDDMM_preprocess()` again with `matC` and a new buffer will make the new buffer active,
/// forgetting about the previously-active buffer and making it inactive.
/// For `cusparseSDDMM()`, there can only be one active buffer per sparse matrix at a time.
/// To get the effect of multiple active buffers for a single sparse matrix,
/// create multiple matrix handles that all point to the same index and value buffers,
/// and call `cusparseSDDMM_preprocess()` once per handle with different workspace buffers.
///
/// Calling `cusparseSDDMM()` with an inactive buffer is always permitted.
/// However, there may be no acceleration from the preprocessing in that case.
///
/// For the purposes of thread safety,
/// `cusparseSDDMM_preprocess()` is writing to `matC` internal state.
///
/// Currently supported sparse matrix formats:
///
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
///
/// `cusparseSDDMM()` supports the following index type for representing the sparse matrix `matA`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// The data types combinations currently supported for `cusparseSDDMM` are listed below:
///
/// Uniform-precision computation:
///
/// | `A`/`X`/ `Y`/`computeType` |
/// | --- |
/// | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_64F`] |
/// | [`cudaDataType_t::CUDA_C_32F`] |
/// | [`cudaDataType_t::CUDA_C_64F`] |
///
/// Mixed-precision computation:
///
/// | `A`/`B` | `C` | `computeType` |
/// | --- | --- | --- |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | |
///
/// `cusparseSDDMM` for [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`] also supports the following mixed-precision computation:
///
/// | `A`/`B` | `C` | `computeType` |
/// | --- | --- | --- |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] | |
///
/// NOTE: [`cudaDataType_t::CUDA_R_16F`], [`cudaDataType_t::CUDA_R_16BF`] data types always imply mixed-precision computation.
///
/// `cusparseSDDMM()` for `CUSPARSE_FORMAT_BSR` supports block sizes of 2, 4, 8, 16, 32, 64 and 128.
///
/// `cusparseSDDMM()` supports the following algorithms:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | `CUSPARSE_SDDMM_ALG_DEFAULT` | Default algorithm. It supports batched computation. |
///
/// Performance notes: `cusparseSDDMM()` for [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`] provides the best performance when `matA` and `matB` satisfy:
///
/// * `matA`:
///
/// + `matA` is in row-major order and `opA` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
/// + `matA` is in col-major order and `opA` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
/// * `matB`:
///
/// + `matB` is in col-major order and `opB` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
/// + `matB` is in row-major order and `opB` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
///
/// `cusparseSDDMM()` for [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`] provides the best performance when `matA` and `matB` satisfy:
///
/// * `matA`:
///
/// + `matA` is in row-major order and `opA` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
/// + `matA` is in col-major order and `opA` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
/// * `matB`:
///
/// + `matB` is in row-major order and `opB` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
/// + `matB` is in col-major order and `opB` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
///
/// `cusparseSDDMM()` supports the following batch modes:
///
/// * $C_{i} = (A \cdot B) \circ C_{i}$
/// * $C_{i} = \left( A_{i} \cdot B \right) \circ C_{i}$
/// * $C_{i} = \left( A \cdot B_{i} \right) \circ C_{i}$
/// * $C_{i} = \left( A_{i} \cdot B_{i} \right) \circ C_{i}$
///
/// The number of batches and their strides can be set by using `cusparseCsrSetStridedBatch` and `cusparseDnMatSetStridedBatch`. The maximum number of batches for `cusparseSDDMM()` is 65,535.
///
/// `cusparseSDDMM()` has the following properties:
///
/// * The routine requires no extra storage
/// * Provides deterministic (bit-wise) results for each run
/// * The routine supports asynchronous execution
/// * The routine allows the indices of `matC` to be unsorted
///
/// `cusparseSDDMM()` supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSDDMM](https://github.com/NVIDIA/CUDALibrarySamples/blob/main/cuSPARSE/sddmm_csr) for a code example. For batched computation please visit [cusparseSDDMM CSR Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/sddmm_csr_batched).
///
/// # Parameters
///
/// - `handle`: Handle to the cuSPARSE library context.
/// - `opA`: Operation `op(A)`.
/// - `opB`: Operation `op(B)`.
/// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
/// - `matA`: Dense matrix `matA`.
/// - `matB`: Dense matrix `matB`.
/// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
/// - `matC`: Sparse matrix `matC`.
/// - `computeType`: Datatype in which the computation is executed.
/// - `alg`: Algorithm for the computation.
/// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
pub fn cusparseSDDMM(
handle: cusparseHandle_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
alpha: *const ::core::ffi::c_void,
matA: cusparseConstDnMatDescr_t,
matB: cusparseConstDnMatDescr_t,
beta: *const ::core::ffi::c_void,
matC: cusparseSpMatDescr_t,
computeType: cudaDataType,
alg: cusparseSDDMMAlg_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpMMOpPlan {
_unused: [u8; 0],
}
pub type cusparseSpMMOpPlan_t = *mut cusparseSpMMOpPlan;
#[repr(u32)]
#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub enum cusparseSpMMOpAlg_t {
CUSPARSE_SPMM_OP_ALG_DEFAULT = 0,
}
unsafe extern "C" {
pub fn cusparseSpMMOp_createPlan(
handle: cusparseHandle_t,
plan: *mut cusparseSpMMOpPlan_t,
opA: cusparseOperation_t,
opB: cusparseOperation_t,
matA: cusparseConstSpMatDescr_t,
matB: cusparseConstDnMatDescr_t,
matC: cusparseDnMatDescr_t,
computeType: cudaDataType,
alg: cusparseSpMMOpAlg_t,
addOperationLtoirBuffer: *const ::core::ffi::c_void,
addOperationBufferSize: size_t,
mulOperationLtoirBuffer: *const ::core::ffi::c_void,
mulOperationBufferSize: size_t,
epilogueLtoirBuffer: *const ::core::ffi::c_void,
epilogueBufferSize: size_t,
SpMMWorkspaceSize: *mut size_t,
) -> cusparseStatus_t;
}
unsafe extern "C" {
/// *NOTE 1:* NVRTC and nvJitLink are not currently available on Arm64 Android platforms.
///
/// *NOTE 2:* The routine does not support Android and Tegra platforms except Judy (sm87).
///
/// `Experimental`: The function performs the multiplication of a sparse matrix `matA` and a dense matrix `matB` with custom operators.
///
/// where
///
/// * `op(A)` is a sparse matrix of size $m \times k$
/// * `op(B)` is a dense matrix of size $k \times n$
/// * `C` is a dense matrix of size $m \times n$
/// * $\oplus$, $\otimes$, and $\text{epilogue}$ are custom **add**, **mul**, and **epilogue** operators respectively.
///
/// Also, for matrix `A` and `B`:
/// $$
/// \operatorname{op}(A) =
/// \begin{cases}
/// A & \text{if } op(A) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// A^T & \text{if } op(A) = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$:
///
/// $$
/// \operatorname{op}(B) =
/// \begin{cases}
/// B & \text{if } op(B) = \text{CUSPARSE_OPERATION_NON_TRANSPOSE} \\
/// B^T & \text{if } op(B) = \text{CUSPARSE_OPERATION_TRANSPOSE}
/// \end{cases}
/// $$
///
/// Only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is currently supported
///
/// The function `cusparseSpMMOp_createPlan()` returns the size of the workspace and the compiled kernel needed by `cusparseSpMMOp()`
///
/// The operators must have the following signature and return type
///
/// `<computetype>` is one of `float`, `double`, `cuComplex`, `cuDoubleComplex`, or `int`,
///
/// `cusparseSpMMOp` supports the following sparse matrix formats:
///
/// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
///
/// `cusparseSpMMOp` supports the following index type for representing the sparse matrix `matA`:
///
/// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
/// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
///
/// `cusparseSpMMOp` supports the following data types:
///
/// Uniform-precision computation:
///
/// | `A`/`B`/ `C`/`computeType` |
/// | --- |
/// | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_64F`] |
/// | [`cudaDataType_t::CUDA_C_32F`] |
/// | [`cudaDataType_t::CUDA_C_64F`] |
///
/// Mixed-precision computation:
///
/// | `A`/`B` | `C` | `computeType` |
/// | --- | --- | --- |
/// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] |
/// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
/// | [`cudaDataType_t::CUDA_R_16F`] | | |
/// | [`cudaDataType_t::CUDA_R_16BF`] | | |
/// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | |
/// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] | |
///
/// `cusparseSpMMOp` supports the following algorithms:
///
/// | Algorithm | Notes |
/// | --- | --- |
/// | `CUSPARSE_SPMM_OP_ALG_DEFAULT` | Default algorithm for any sparse matrix format |
///
/// **Performance notes:**
///
/// * Row-major layout provides higher performance than column-major.
///
/// `cusparseSpMMOp()` has the following properties:
///
/// * The routine requires extra storage
/// * The routine supports asynchronous execution
/// * Provides deterministic (bit-wise) results for each run
/// * The routine allows the indices of `matA` to be unsorted
///
/// `cusparseSpMMOp()` supports the following optimizations:
///
/// * CUDA graph capture
/// * Hardware Memory Compression
///
/// Please visit [cuSPARSE Library Samples - cusparseSpMMOp](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr_op).
pub fn cusparseSpMMOp(
plan: cusparseSpMMOpPlan_t,
externalBuffer: *mut ::core::ffi::c_void,
) -> cusparseStatus_t;
}
unsafe extern "C" {
pub fn cusparseSpMMOp_destroyPlan(plan: cusparseSpMMOpPlan_t) -> cusparseStatus_t;
}