singe_cusparse_sys/
sys_12709.rs

1/* automatically generated by rust-bindgen 0.72.1 */
2
3#[repr(C)]
4#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
5pub struct __BindgenBitfieldUnit<Storage> {
6    storage: Storage,
7}
8impl<Storage> __BindgenBitfieldUnit<Storage> {
9    #[inline]
10    pub const fn new(storage: Storage) -> Self {
11        Self { storage }
12    }
13}
14impl<Storage> __BindgenBitfieldUnit<Storage>
15where
16    Storage: AsRef<[u8]> + AsMut<[u8]>,
17{
18    #[inline]
19    fn extract_bit(byte: u8, index: usize) -> bool {
20        let bit_index = if cfg!(target_endian = "big") {
21            7 - (index % 8)
22        } else {
23            index % 8
24        };
25        let mask = 1 << bit_index;
26        byte & mask == mask
27    }
28    #[inline]
29    pub fn get_bit(&self, index: usize) -> bool {
30        debug_assert!(index / 8 < self.storage.as_ref().len());
31        let byte_index = index / 8;
32        let byte = self.storage.as_ref()[byte_index];
33        Self::extract_bit(byte, index)
34    }
35    #[inline]
36    pub unsafe fn raw_get_bit(this: *const Self, index: usize) -> bool {
37        debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
38        let byte_index = index / 8;
39        let byte = unsafe {
40            *(core::ptr::addr_of!((* this).storage) as *const u8)
41                .offset(byte_index as isize)
42        };
43        Self::extract_bit(byte, index)
44    }
45    #[inline]
46    fn change_bit(byte: u8, index: usize, val: bool) -> u8 {
47        let bit_index = if cfg!(target_endian = "big") {
48            7 - (index % 8)
49        } else {
50            index % 8
51        };
52        let mask = 1 << bit_index;
53        if val { byte | mask } else { byte & !mask }
54    }
55    #[inline]
56    pub fn set_bit(&mut self, index: usize, val: bool) {
57        debug_assert!(index / 8 < self.storage.as_ref().len());
58        let byte_index = index / 8;
59        let byte = &mut self.storage.as_mut()[byte_index];
60        *byte = Self::change_bit(*byte, index, val);
61    }
62    #[inline]
63    pub unsafe fn raw_set_bit(this: *mut Self, index: usize, val: bool) {
64        debug_assert!(index / 8 < core::mem::size_of::< Storage > ());
65        let byte_index = index / 8;
66        let byte = unsafe {
67            (core::ptr::addr_of_mut!((* this).storage) as *mut u8)
68                .offset(byte_index as isize)
69        };
70        unsafe { *byte = Self::change_bit(*byte, index, val) };
71    }
72    #[inline]
73    pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
74        debug_assert!(bit_width <= 64);
75        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
76        debug_assert!(
77            (bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
78        );
79        let mut val = 0;
80        for i in 0..(bit_width as usize) {
81            if self.get_bit(i + bit_offset) {
82                let index = if cfg!(target_endian = "big") {
83                    bit_width as usize - 1 - i
84                } else {
85                    i
86                };
87                val |= 1 << index;
88            }
89        }
90        val
91    }
92    #[inline]
93    pub unsafe fn raw_get(this: *const Self, bit_offset: usize, bit_width: u8) -> u64 {
94        debug_assert!(bit_width <= 64);
95        debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
96        debug_assert!(
97            (bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
98        );
99        let mut val = 0;
100        for i in 0..(bit_width as usize) {
101            if unsafe { Self::raw_get_bit(this, i + bit_offset) } {
102                let index = if cfg!(target_endian = "big") {
103                    bit_width as usize - 1 - i
104                } else {
105                    i
106                };
107                val |= 1 << index;
108            }
109        }
110        val
111    }
112    #[inline]
113    pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
114        debug_assert!(bit_width <= 64);
115        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
116        debug_assert!(
117            (bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()
118        );
119        for i in 0..(bit_width as usize) {
120            let mask = 1 << i;
121            let val_bit_is_set = val & mask == mask;
122            let index = if cfg!(target_endian = "big") {
123                bit_width as usize - 1 - i
124            } else {
125                i
126            };
127            self.set_bit(index + bit_offset, val_bit_is_set);
128        }
129    }
130    #[inline]
131    pub unsafe fn raw_set(this: *mut Self, bit_offset: usize, bit_width: u8, val: u64) {
132        debug_assert!(bit_width <= 64);
133        debug_assert!(bit_offset / 8 < core::mem::size_of::< Storage > ());
134        debug_assert!(
135            (bit_offset + (bit_width as usize)) / 8 <= core::mem::size_of::< Storage > ()
136        );
137        for i in 0..(bit_width as usize) {
138            let mask = 1 << i;
139            let val_bit_is_set = val & mask == mask;
140            let index = if cfg!(target_endian = "big") {
141                bit_width as usize - 1 - i
142            } else {
143                i
144            };
145            unsafe { Self::raw_set_bit(this, index + bit_offset, val_bit_is_set) };
146        }
147    }
148}
149pub const CUSPARSE_VER_MAJOR: u32 = 12;
150pub const CUSPARSE_VER_MINOR: u32 = 7;
151pub const CUSPARSE_VER_PATCH: u32 = 9;
152pub const CUSPARSE_VER_BUILD: u32 = 17;
153pub const CUSPARSE_VERSION: u32 = 12709;
154pub type __uint64_t = ::core::ffi::c_ulong;
155pub type __off_t = ::core::ffi::c_long;
156pub type __off64_t = ::core::ffi::c_long;
157#[repr(C)]
158#[repr(align(8))]
159#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
160pub struct float2 {
161    pub x: f32,
162    pub y: f32,
163}
164#[repr(C)]
165#[repr(align(16))]
166#[derive(Debug, Default, Copy, Clone, PartialOrd, PartialEq)]
167pub struct double2 {
168    pub x: f64,
169    pub y: f64,
170}
171pub type cuFloatComplex = float2;
172pub type cuDoubleComplex = double2;
173pub type cuComplex = cuFloatComplex;
174pub type size_t = ::core::ffi::c_ulong;
175#[repr(C)]
176#[derive(Debug, Copy, Clone)]
177pub struct CUstream_st {
178    _unused: [u8; 0],
179}
180pub type FILE = _IO_FILE;
181#[repr(C)]
182#[derive(Debug, Copy, Clone)]
183pub struct _IO_marker {
184    _unused: [u8; 0],
185}
186#[repr(C)]
187#[derive(Debug, Copy, Clone)]
188pub struct _IO_codecvt {
189    _unused: [u8; 0],
190}
191#[repr(C)]
192#[derive(Debug, Copy, Clone)]
193pub struct _IO_wide_data {
194    _unused: [u8; 0],
195}
196pub type _IO_lock_t = ::core::ffi::c_void;
197#[repr(C)]
198#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
199pub struct _IO_FILE {
200    pub _flags: ::core::ffi::c_int,
201    pub _IO_read_ptr: *mut ::core::ffi::c_char,
202    pub _IO_read_end: *mut ::core::ffi::c_char,
203    pub _IO_read_base: *mut ::core::ffi::c_char,
204    pub _IO_write_base: *mut ::core::ffi::c_char,
205    pub _IO_write_ptr: *mut ::core::ffi::c_char,
206    pub _IO_write_end: *mut ::core::ffi::c_char,
207    pub _IO_buf_base: *mut ::core::ffi::c_char,
208    pub _IO_buf_end: *mut ::core::ffi::c_char,
209    pub _IO_save_base: *mut ::core::ffi::c_char,
210    pub _IO_backup_base: *mut ::core::ffi::c_char,
211    pub _IO_save_end: *mut ::core::ffi::c_char,
212    pub _markers: *mut _IO_marker,
213    pub _chain: *mut _IO_FILE,
214    pub _fileno: ::core::ffi::c_int,
215    pub _bitfield_align_1: [u32; 0],
216    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 3usize]>,
217    pub _short_backupbuf: [::core::ffi::c_char; 1usize],
218    pub _old_offset: __off_t,
219    pub _cur_column: ::core::ffi::c_ushort,
220    pub _vtable_offset: ::core::ffi::c_schar,
221    pub _shortbuf: [::core::ffi::c_char; 1usize],
222    pub _lock: *mut _IO_lock_t,
223    pub _offset: __off64_t,
224    pub _codecvt: *mut _IO_codecvt,
225    pub _wide_data: *mut _IO_wide_data,
226    pub _freeres_list: *mut _IO_FILE,
227    pub _freeres_buf: *mut ::core::ffi::c_void,
228    pub _prevchain: *mut *mut _IO_FILE,
229    pub _mode: ::core::ffi::c_int,
230    pub _unused3: ::core::ffi::c_int,
231    pub _total_written: __uint64_t,
232    pub _unused2: [::core::ffi::c_char; 8usize],
233}
234impl Default for _IO_FILE {
235    fn default() -> Self {
236        let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
237        unsafe {
238            ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
239            s.assume_init()
240        }
241    }
242}
243impl _IO_FILE {
244    #[inline]
245    pub fn _flags2(&self) -> ::core::ffi::c_int {
246        unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 24u8) as u32) }
247    }
248    #[inline]
249    pub fn set__flags2(&mut self, val: ::core::ffi::c_int) {
250        unsafe {
251            let val: u32 = ::core::mem::transmute(val);
252            self._bitfield_1.set(0usize, 24u8, val as u64)
253        }
254    }
255    #[inline]
256    pub unsafe fn _flags2_raw(this: *const Self) -> ::core::ffi::c_int {
257        unsafe {
258            ::core::mem::transmute(
259                <__BindgenBitfieldUnit<
260                    [u8; 3usize],
261                >>::raw_get(::core::ptr::addr_of!((* this)._bitfield_1), 0usize, 24u8)
262                    as u32,
263            )
264        }
265    }
266    #[inline]
267    pub unsafe fn set__flags2_raw(this: *mut Self, val: ::core::ffi::c_int) {
268        unsafe {
269            let val: u32 = ::core::mem::transmute(val);
270            <__BindgenBitfieldUnit<
271                [u8; 3usize],
272            >>::raw_set(
273                ::core::ptr::addr_of_mut!((* this)._bitfield_1),
274                0usize,
275                24u8,
276                val as u64,
277            )
278        }
279    }
280    #[inline]
281    pub fn new_bitfield_1(
282        _flags2: ::core::ffi::c_int,
283    ) -> __BindgenBitfieldUnit<[u8; 3usize]> {
284        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 3usize]> = Default::default();
285        __bindgen_bitfield_unit
286            .set(
287                0usize,
288                24u8,
289                {
290                    let _flags2: u32 = unsafe { ::core::mem::transmute(_flags2) };
291                    _flags2 as u64
292                },
293            );
294        __bindgen_bitfield_unit
295    }
296}
297#[repr(C)]
298#[derive(Debug, Copy, Clone)]
299pub struct cusparseContext {
300    _unused: [u8; 0],
301}
302/// This is a pointer type to an opaque cuSPARSE context, which the user must initialize by calling prior to calling [`cusparseCreate`] any other library function. The handle created and returned by [`cusparseCreate`] must be passed to every cuSPARSE function.
303pub type cusparseHandle_t = *mut cusparseContext;
304#[repr(C)]
305#[derive(Debug, Copy, Clone)]
306pub struct cusparseMatDescr {
307    _unused: [u8; 0],
308}
309/// This structure is used to describe the shape and properties of a matrix.
310pub type cusparseMatDescr_t = *mut cusparseMatDescr;
311#[repr(C)]
312#[derive(Debug, Copy, Clone)]
313pub struct bsrsv2Info {
314    _unused: [u8; 0],
315}
316pub type bsrsv2Info_t = *mut bsrsv2Info;
317#[repr(C)]
318#[derive(Debug, Copy, Clone)]
319pub struct bsrsm2Info {
320    _unused: [u8; 0],
321}
322pub type bsrsm2Info_t = *mut bsrsm2Info;
323#[repr(C)]
324#[derive(Debug, Copy, Clone)]
325pub struct csric02Info {
326    _unused: [u8; 0],
327}
328pub type csric02Info_t = *mut csric02Info;
329#[repr(C)]
330#[derive(Debug, Copy, Clone)]
331pub struct bsric02Info {
332    _unused: [u8; 0],
333}
334pub type bsric02Info_t = *mut bsric02Info;
335#[repr(C)]
336#[derive(Debug, Copy, Clone)]
337pub struct csrilu02Info {
338    _unused: [u8; 0],
339}
340pub type csrilu02Info_t = *mut csrilu02Info;
341#[repr(C)]
342#[derive(Debug, Copy, Clone)]
343pub struct bsrilu02Info {
344    _unused: [u8; 0],
345}
346pub type bsrilu02Info_t = *mut bsrilu02Info;
347#[repr(C)]
348#[derive(Debug, Copy, Clone)]
349pub struct csru2csrInfo {
350    _unused: [u8; 0],
351}
352pub type csru2csrInfo_t = *mut csru2csrInfo;
353#[repr(C)]
354#[derive(Debug, Copy, Clone)]
355pub struct cusparseColorInfo {
356    _unused: [u8; 0],
357}
358/// This is a pointer type to an opaque structure holding the information used in `csrcolor()`.
359#[deprecated]
360pub type cusparseColorInfo_t = *mut cusparseColorInfo;
361#[repr(C)]
362#[derive(Debug, Copy, Clone)]
363pub struct pruneInfo {
364    _unused: [u8; 0],
365}
366pub type pruneInfo_t = *mut pruneInfo;
367/// This data type represents the status returned by the library functions and it can have the following values.
368#[repr(u32)]
369#[derive(
370    Debug,
371    Copy,
372    Clone,
373    Hash,
374    PartialOrd,
375    Ord,
376    PartialEq,
377    Eq,
378    TryFromPrimitive,
379    IntoPrimitive,
380)]
381pub enum cusparseStatus_t {
382    /// The operation completed successfully.
383    CUSPARSE_STATUS_SUCCESS = 0,
384    /// The cuSPARSE library was not initialized. This is usually caused by the lack of a prior call, an error in the CUDA Runtime API called by the cuSPARSE routine, or an error in the hardware setup  **To correct:** call [`cusparseCreate`] prior to the function call; and check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed  The error also applies to generic APIs (cuSPARSE Generic APIs) for indicating a matrix/vector descriptor not initialized.
385    CUSPARSE_STATUS_NOT_INITIALIZED = 1,
386    /// Resource allocation failed inside the cuSPARSE library. This is usually caused by a device memory allocation (`cudaMalloc()`) or by a host memory allocation failure  **To correct:** prior to the function call, deallocate previously allocated memory as much as possible.
387    CUSPARSE_STATUS_ALLOC_FAILED = 2,
388    /// An unsupported value or parameter was passed to the function (a negative vector size, for example)  **To correct:** ensure that all the parameters being passed have valid values.
389    CUSPARSE_STATUS_INVALID_VALUE = 3,
390    /// The function requires a feature absent from the device architecture  **To correct:** compile and run the application on a device with appropriate compute capability.
391    CUSPARSE_STATUS_ARCH_MISMATCH = 4,
392    CUSPARSE_STATUS_MAPPING_ERROR = 5,
393    /// The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons  **To correct:** check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed.
394    CUSPARSE_STATUS_EXECUTION_FAILED = 6,
395    /// An internal cuSPARSE operation failed  **To correct:** check that the hardware, an appropriate version of the driver, and the cuSPARSE library are correctly installed. Also, check that the memory passed as a parameter to the routine is not being deallocated prior to the routine completion.
396    CUSPARSE_STATUS_INTERNAL_ERROR = 7,
397    /// The matrix type is not supported by this function. This is usually caused by passing an invalid matrix descriptor to the function  **To correct:** check that the fields in `cusparseMatDescr_t descrA` were set correctly.
398    CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
399    CUSPARSE_STATUS_ZERO_PIVOT = 9,
400    /// The operation or data type combination is currently not supported by the function.
401    CUSPARSE_STATUS_NOT_SUPPORTED = 10,
402    /// The resources for the computation, such as GPU global or shared memory, are not sufficient to complete the operation. The error can also indicate that the current computation mode (e.g. bit size of sparse matrix indices) does not allow to handle the given input.
403    CUSPARSE_STATUS_INSUFFICIENT_RESOURCES = 11,
404}
405/// This type indicates whether the scalar values are passed by reference on the host or device. It is important to point out that if several scalar values are passed by reference in the function call, all of them will conform to the same single pointer mode. The pointer mode can be set and retrieved using [`cusparseSetPointerMode`] and [`cusparseGetPointerMode`] routines, respectively.
406#[repr(u32)]
407#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
408pub enum cusparsePointerMode_t {
409    /// The scalars are passed by reference on the host.
410    CUSPARSE_POINTER_MODE_HOST = 0,
411    /// The scalars are passed by reference on the device.
412    CUSPARSE_POINTER_MODE_DEVICE = 1,
413}
414/// This type indicates whether the operation is performed only on indices or on data and indices.
415#[repr(u32)]
416#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
417pub enum cusparseAction_t {
418    /// the operation is performed only on indices.
419    CUSPARSE_ACTION_SYMBOLIC = 0,
420    /// the operation is performed on data and indices.
421    CUSPARSE_ACTION_NUMERIC = 1,
422}
423/// This type indicates the type of matrix stored in sparse storage. Notice that for symmetric, Hermitian and triangular matrices only their lower or upper part is assumed to be stored.
424///
425/// The whole idea of matrix type and fill mode is to keep minimum storage for symmetric/Hermitian matrix, and also to take advantage of symmetric property on SpMV (Sparse Matrix Vector multiplication). To compute `y=A*x` when `A` is symmetric and only lower triangular part is stored, two steps are needed. First step is to compute `y=(L+D)*x` and second step is to compute `y=L^T*x + y`. Given the fact that the transpose operation `y=L^T*x` is 10x slower than non-transpose version `y=L*x`, the symmetric property does not show up any performance gain. It is better for the user to extend the symmetric matrix to a general matrix and apply `y=A*x` with matrix type [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
426///
427/// In general, SpMV, preconditioners (incomplete Cholesky or incomplete LU) and triangular solver are combined together in iterative solvers, for example PCG and GMRES. If the user always uses general matrix (instead of symmetric matrix), there is no need to support other than general matrix in preconditioners. Therefore the new routines, `[bsr|csr]sv2` (triangular solver), `[bsr|csr]ilu02` (incomplete LU) and `[bsr|csr]ic02` (incomplete Cholesky), only support matrix type [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
428#[repr(u32)]
429#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
430pub enum cusparseMatrixType_t {
431    /// the matrix is general.
432    CUSPARSE_MATRIX_TYPE_GENERAL = 0,
433    /// the matrix is symmetric.
434    CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1,
435    /// the matrix is Hermitian.
436    CUSPARSE_MATRIX_TYPE_HERMITIAN = 2,
437    /// the matrix is triangular.
438    CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3,
439}
440/// This type indicates if the lower or upper part of a matrix is stored in sparse storage.
441#[repr(u32)]
442#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
443pub enum cusparseFillMode_t {
444    /// The lower triangular part is stored.
445    CUSPARSE_FILL_MODE_LOWER = 0,
446    /// The upper triangular part is stored.
447    CUSPARSE_FILL_MODE_UPPER = 1,
448}
449/// This type indicates if the matrix diagonal entries are unity. The diagonal elements are always assumed to be present, but if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is passed to an API routine, then the routine assumes that all diagonal entries are unity and will not read or modify those entries. Note that in this case the routine assumes the diagonal entries are equal to one, regardless of what those entries are actually set to in memory.
450#[repr(u32)]
451#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
452pub enum cusparseDiagType_t {
453    /// The matrix diagonal has non-unit elements.
454    CUSPARSE_DIAG_TYPE_NON_UNIT = 0,
455    /// The matrix diagonal has unit elements.
456    CUSPARSE_DIAG_TYPE_UNIT = 1,
457}
458/// This type indicates if the base of the matrix indices is zero or one.
459#[repr(u32)]
460#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
461pub enum cusparseIndexBase_t {
462    /// The base index is zero (C compatibility).
463    CUSPARSE_INDEX_BASE_ZERO = 0,
464    /// The base index is one (Fortran compatibility).
465    CUSPARSE_INDEX_BASE_ONE = 1,
466}
467/// This type indicates which operations is applied to the related input (e.g. sparse matrix, or vector).
468#[repr(u32)]
469#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
470pub enum cusparseOperation_t {
471    /// The non-transpose operation is selected.
472    CUSPARSE_OPERATION_NON_TRANSPOSE = 0,
473    /// The transpose operation is selected.
474    CUSPARSE_OPERATION_TRANSPOSE = 1,
475    /// The conjugate transpose operation is selected.
476    CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2,
477}
478/// This type indicates whether the elements of a dense matrix should be parsed by rows or by columns (assuming column-major storage in memory of the dense matrix) in function cusparse[S|D|C|Z]nnz. Besides storage format of blocks in BSR format is also controlled by this type.
479#[repr(u32)]
480#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
481pub enum cusparseDirection_t {
482    /// The matrix should be parsed by rows.
483    CUSPARSE_DIRECTION_ROW = 0,
484    /// The matrix should be parsed by columns.
485    CUSPARSE_DIRECTION_COLUMN = 1,
486}
487/// This type indicates whether level information is generated and used in `csrsv2, csric02, csrilu02, bsrsv2, bsric02 and bsrilu02`.
488#[deprecated]
489#[repr(u32)]
490#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
491pub enum cusparseSolvePolicy_t {
492    /// no level information is generated and used.
493    #[deprecated]
494    CUSPARSE_SOLVE_POLICY_NO_LEVEL = 0,
495    /// generate and use level information.
496    #[deprecated]
497    CUSPARSE_SOLVE_POLICY_USE_LEVEL = 1,
498}
499#[repr(u32)]
500#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
501pub enum cusparseColorAlg_t {
502    CUSPARSE_COLOR_ALG0 = 0,
503    CUSPARSE_COLOR_ALG1 = 1,
504}
505unsafe extern "C" {
506    /// This function initializes the cuSPARSE library and creates a handle on the cuSPARSE context. It must be called before any other cuSPARSE API function is invoked. It allocates hardware resources necessary for accessing the GPU.
507    ///
508    /// # Parameters
509    ///
510    /// - `handle`: The pointer to the handle to the cuSPARSE context.
511    pub fn cusparseCreate(handle: *mut cusparseHandle_t) -> cusparseStatus_t;
512}
513unsafe extern "C" {
514    /// This function releases CPU-side resources used by the cuSPARSE library. The release of GPU-side resources may be deferred until the application shuts down.
515    ///
516    /// # Parameters
517    ///
518    /// - `handle`: The handle to the cuSPARSE context.
519    pub fn cusparseDestroy(handle: cusparseHandle_t) -> cusparseStatus_t;
520}
521unsafe extern "C" {
522    /// This function returns the version number of the cuSPARSE library.
523    ///
524    /// # Parameters
525    ///
526    /// - `handle`: cuSPARSE handle.
527    /// - `version`: The version number of the library.
528    pub fn cusparseGetVersion(
529        handle: cusparseHandle_t,
530        version: *mut ::core::ffi::c_int,
531    ) -> cusparseStatus_t;
532}
533unsafe extern "C" {
534    /// The function returns the value of the requested property. Refer to `libraryPropertyType` for supported types.
535    ///
536    /// `libraryPropertyType` (defined in `library_types.h`).
537    ///
538    /// # Parameters
539    ///
540    /// - `value`: Value of the requested property.
541    pub fn cusparseGetProperty(
542        type_: libraryPropertyType,
543        value: *mut ::core::ffi::c_int,
544    ) -> cusparseStatus_t;
545}
546unsafe extern "C" {
547    /// The function returns the string representation of an error code enum name. If the error code is not recognized, “unrecognized error code” is returned.
548    ///
549    /// # Parameters
550    ///
551    /// - `status`: Error code to convert to string.
552    pub fn cusparseGetErrorName(status: cusparseStatus_t) -> *const ::core::ffi::c_char;
553}
554unsafe extern "C" {
555    /// # Parameters
556    ///
557    /// - `status`: Error code to convert to string.
558    pub fn cusparseGetErrorString(
559        status: cusparseStatus_t,
560    ) -> *const ::core::ffi::c_char;
561}
562unsafe extern "C" {
563    /// This function sets the stream to be used by the cuSPARSE library to execute its routines.
564    ///
565    /// # Parameters
566    ///
567    /// - `handle`: The handle to the cuSPARSE context.
568    /// - `streamId`: The stream to be used by the library.
569    pub fn cusparseSetStream(
570        handle: cusparseHandle_t,
571        streamId: cudaStream_t,
572    ) -> cusparseStatus_t;
573}
574unsafe extern "C" {
575    /// This function gets the cuSPARSE library stream, which is being used to to execute all calls to the cuSPARSE library functions. If the cuSPARSE library stream is not set, all kernels use the default NULL stream.
576    ///
577    /// # Parameters
578    ///
579    /// - `handle`: The handle to the cuSPARSE context.
580    /// - `streamId`: The stream used by the library.
581    pub fn cusparseGetStream(
582        handle: cusparseHandle_t,
583        streamId: *mut cudaStream_t,
584    ) -> cusparseStatus_t;
585}
586unsafe extern "C" {
587    /// This function obtains the pointer mode used by the cuSPARSE library. Please see the section on the [`cusparsePointerMode_t`] type for more details.
588    ///
589    /// # Parameters
590    ///
591    /// - `handle`: The handle to the cuSPARSE context.
592    /// - `mode`: One of the enumerated pointer mode types.
593    pub fn cusparseGetPointerMode(
594        handle: cusparseHandle_t,
595        mode: *mut cusparsePointerMode_t,
596    ) -> cusparseStatus_t;
597}
598unsafe extern "C" {
599    /// This function sets the pointer mode used by the cuSPARSE library. The *default* is for the values to be passed by reference on the host. Please see the section on the `cublasPointerMode_t` type for more details.
600    ///
601    /// # Parameters
602    ///
603    /// - `handle`: The handle to the cuSPARSE context.
604    /// - `mode`: One of the enumerated pointer mode types.
605    pub fn cusparseSetPointerMode(
606        handle: cusparseHandle_t,
607        mode: cusparsePointerMode_t,
608    ) -> cusparseStatus_t;
609}
610pub type cusparseLoggerCallback_t = ::core::option::Option<
611    unsafe extern "C" fn(
612        logLevel: ::core::ffi::c_int,
613        functionName: *const ::core::ffi::c_char,
614        message: *const ::core::ffi::c_char,
615    ),
616>;
617unsafe extern "C" {
618    /// *Experimental*: The function sets the logging callback function.
619    ///
620    /// where [`cusparseLoggerCallback_t`] has the following signature.
621    pub fn cusparseLoggerSetCallback(
622        callback: cusparseLoggerCallback_t,
623    ) -> cusparseStatus_t;
624}
625unsafe extern "C" {
626    /// *Experimental*: The function sets the logging output file. Note: once registered using this function call, the provided file handle must not be closed unless the function is called again to switch to a different file handle.
627    ///
628    /// # Parameters
629    ///
630    /// - `file`: Pointer to an open file. File should have write permission.
631    pub fn cusparseLoggerSetFile(file: *mut FILE) -> cusparseStatus_t;
632}
633unsafe extern "C" {
634    /// *Experimental*: The function opens a logging output file in the given path.
635    ///
636    /// # Parameters
637    ///
638    /// - `logFile`: Path of the logging output file.
639    pub fn cusparseLoggerOpenFile(
640        logFile: *const ::core::ffi::c_char,
641    ) -> cusparseStatus_t;
642}
643unsafe extern "C" {
644    /// *Experimental*: The function sets the value of the logging level. path.
645    ///
646    /// # Parameters
647    ///
648    /// - `level`: Value of the logging level.
649    pub fn cusparseLoggerSetLevel(level: ::core::ffi::c_int) -> cusparseStatus_t;
650}
651unsafe extern "C" {
652    /// *Experimental*: The function sets the value of the logging mask.
653    ///
654    /// # Parameters
655    ///
656    /// - `mask`: Value of the logging mask.
657    pub fn cusparseLoggerSetMask(mask: ::core::ffi::c_int) -> cusparseStatus_t;
658}
659unsafe extern "C" {
660    pub fn cusparseLoggerForceDisable() -> cusparseStatus_t;
661}
662unsafe extern "C" {
663    /// This function initializes the matrix descriptor. It sets the fields `MatrixType` and `IndexBase` to the *default* values [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] and [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO`], respectively, while leaving other fields uninitialized.
664    ///
665    /// # Parameters
666    ///
667    /// - `descrA`: the pointer to the matrix descriptor.
668    pub fn cusparseCreateMatDescr(descrA: *mut cusparseMatDescr_t) -> cusparseStatus_t;
669}
670unsafe extern "C" {
671    /// This function releases the memory allocated for the matrix descriptor.
672    ///
673    /// # Parameters
674    ///
675    /// - `descrA`: the matrix descriptor.
676    pub fn cusparseDestroyMatDescr(descrA: cusparseMatDescr_t) -> cusparseStatus_t;
677}
678unsafe extern "C" {
679    /// This function sets the `MatrixType` field of the matrix descriptor `descrA`.
680    ///
681    /// # Parameters
682    ///
683    /// - `descrA`: the matrix descriptor.
684    pub fn cusparseSetMatType(
685        descrA: cusparseMatDescr_t,
686        type_: cusparseMatrixType_t,
687    ) -> cusparseStatus_t;
688}
689unsafe extern "C" {
690    /// This function returns the `MatrixType` field of the matrix descriptor `descrA`.
691    ///
692    /// # Parameters
693    ///
694    /// - `descrA`: the matrix descriptor.
695    pub fn cusparseGetMatType(descrA: cusparseMatDescr_t) -> cusparseMatrixType_t;
696}
697unsafe extern "C" {
698    /// This function sets the `FillMode` field of the matrix descriptor `descrA`.
699    ///
700    /// # Parameters
701    ///
702    /// - `descrA`: the matrix descriptor.
703    pub fn cusparseSetMatFillMode(
704        descrA: cusparseMatDescr_t,
705        fillMode: cusparseFillMode_t,
706    ) -> cusparseStatus_t;
707}
708unsafe extern "C" {
709    /// This function returns the `FillMode` field of the matrix descriptor `descrA`.
710    ///
711    /// # Parameters
712    ///
713    /// - `descrA`: the matrix descriptor.
714    pub fn cusparseGetMatFillMode(descrA: cusparseMatDescr_t) -> cusparseFillMode_t;
715}
716unsafe extern "C" {
717    /// This function sets the `DiagType` field of the matrix descriptor `descrA`.
718    ///
719    /// # Parameters
720    ///
721    /// - `descrA`: the matrix descriptor.
722    pub fn cusparseSetMatDiagType(
723        descrA: cusparseMatDescr_t,
724        diagType: cusparseDiagType_t,
725    ) -> cusparseStatus_t;
726}
727unsafe extern "C" {
728    /// This function returns the `DiagType` field of the matrix descriptor `descrA`.
729    ///
730    /// # Parameters
731    ///
732    /// - `descrA`: the matrix descriptor.
733    pub fn cusparseGetMatDiagType(descrA: cusparseMatDescr_t) -> cusparseDiagType_t;
734}
735unsafe extern "C" {
736    /// This function sets the `IndexBase` field of the matrix descriptor `descrA`.
737    ///
738    /// # Parameters
739    ///
740    /// - `descrA`: the matrix descriptor.
741    pub fn cusparseSetMatIndexBase(
742        descrA: cusparseMatDescr_t,
743        base: cusparseIndexBase_t,
744    ) -> cusparseStatus_t;
745}
746unsafe extern "C" {
747    /// This function returns the `IndexBase` field of the matrix descriptor `descrA`.
748    ///
749    /// # Parameters
750    ///
751    /// - `descrA`: the matrix descriptor.
752    pub fn cusparseGetMatIndexBase(descrA: cusparseMatDescr_t) -> cusparseIndexBase_t;
753}
754unsafe extern "C" {
755    /// This function creates and initializes the solve and analysis structure of incomplete Cholesky to *default* values.
756    ///
757    /// # Parameters
758    ///
759    /// - `info`: the pointer to the solve and analysis structure of incomplete Cholesky.
760    #[deprecated]
761    pub fn cusparseCreateCsric02Info(info: *mut csric02Info_t) -> cusparseStatus_t;
762}
763unsafe extern "C" {
764    /// This function destroys and releases any memory required by the structure.
765    ///
766    /// # Parameters
767    ///
768    /// - `info`: the solve `(csric02_solve)` and analysis `(csric02_analysis)` structure.
769    #[deprecated]
770    pub fn cusparseDestroyCsric02Info(info: csric02Info_t) -> cusparseStatus_t;
771}
772unsafe extern "C" {
773    /// This function creates and initializes the solve and analysis structure of block incomplete Cholesky to *default* values.
774    ///
775    /// # Parameters
776    ///
777    /// - `info`: the pointer to the solve and analysis structure of block incomplete Cholesky.
778    #[deprecated]
779    pub fn cusparseCreateBsric02Info(info: *mut bsric02Info_t) -> cusparseStatus_t;
780}
781unsafe extern "C" {
782    /// This function destroys and releases any memory required by the structure.
783    ///
784    /// # Parameters
785    ///
786    /// - `info`: the solve `(bsric02_solve)` and analysis `(bsric02_analysis)` structure.
787    #[deprecated]
788    pub fn cusparseDestroyBsric02Info(info: bsric02Info_t) -> cusparseStatus_t;
789}
790unsafe extern "C" {
791    /// This function creates and initializes the solve and analysis structure of incomplete LU to *default* values.
792    ///
793    /// # Parameters
794    ///
795    /// - `info`: the pointer to the solve and analysis structure of incomplete LU.
796    #[deprecated]
797    pub fn cusparseCreateCsrilu02Info(info: *mut csrilu02Info_t) -> cusparseStatus_t;
798}
799unsafe extern "C" {
800    /// This function destroys and releases any memory required by the structure.
801    ///
802    /// # Parameters
803    ///
804    /// - `info`: the solve `(csrilu02_solve)` and analysis `(csrilu02_analysis)` structure.
805    #[deprecated]
806    pub fn cusparseDestroyCsrilu02Info(info: csrilu02Info_t) -> cusparseStatus_t;
807}
808unsafe extern "C" {
809    /// This function creates and initializes the solve and analysis structure of block incomplete LU to *default* values.
810    ///
811    /// # Parameters
812    ///
813    /// - `info`: the pointer to the solve and analysis structure of block incomplete LU.
814    #[deprecated]
815    pub fn cusparseCreateBsrilu02Info(info: *mut bsrilu02Info_t) -> cusparseStatus_t;
816}
817unsafe extern "C" {
818    /// This function destroys and releases any memory required by the structure.
819    ///
820    /// # Parameters
821    ///
822    /// - `info`: the solve `(bsrilu02_solve)` and analysis `(bsrilu02_analysis)` structure.
823    #[deprecated]
824    pub fn cusparseDestroyBsrilu02Info(info: bsrilu02Info_t) -> cusparseStatus_t;
825}
826unsafe extern "C" {
827    /// This function creates and initializes the solve and analysis structure of bsrsv2 to *default* values.
828    ///
829    /// # Parameters
830    ///
831    /// - `info`: the pointer to the solve and analysis structure of bsrsv2.
832    #[deprecated]
833    pub fn cusparseCreateBsrsv2Info(info: *mut bsrsv2Info_t) -> cusparseStatus_t;
834}
835unsafe extern "C" {
836    /// This function destroys and releases any memory required by the structure.
837    ///
838    /// # Parameters
839    ///
840    /// - `info`: the solve `(bsrsv2_solve)` and analysis `(bsrsv2_analysis)` structure.
841    #[deprecated]
842    pub fn cusparseDestroyBsrsv2Info(info: bsrsv2Info_t) -> cusparseStatus_t;
843}
844unsafe extern "C" {
845    /// This function creates and initializes the solve and analysis structure of bsrsm2 to *default* values.
846    ///
847    /// # Parameters
848    ///
849    /// - `info`: the pointer to the solve and analysis structure of bsrsm2.
850    #[deprecated]
851    pub fn cusparseCreateBsrsm2Info(info: *mut bsrsm2Info_t) -> cusparseStatus_t;
852}
853unsafe extern "C" {
854    /// This function destroys and releases any memory required by the structure.
855    ///
856    /// # Parameters
857    ///
858    /// - `info`: the solve `(bsrsm2_solve)` and analysis `(bsrsm2_analysis)` structure.
859    #[deprecated]
860    pub fn cusparseDestroyBsrsm2Info(info: bsrsm2Info_t) -> cusparseStatus_t;
861}
862unsafe extern "C" {
863    pub fn cusparseCreateCsru2csrInfo(info: *mut csru2csrInfo_t) -> cusparseStatus_t;
864}
865unsafe extern "C" {
866    pub fn cusparseDestroyCsru2csrInfo(info: csru2csrInfo_t) -> cusparseStatus_t;
867}
868unsafe extern "C" {
869    /// This function creates and initializes the [`cusparseColorInfo_t`] structure to *default* values.
870    ///
871    /// # Parameters
872    ///
873    /// - `info`: the pointer to the [`cusparseColorInfo_t`] structure.
874    #[deprecated]
875    pub fn cusparseCreateColorInfo(info: *mut cusparseColorInfo_t) -> cusparseStatus_t;
876}
877unsafe extern "C" {
878    /// This function destroys and releases any memory required by the structure.
879    ///
880    /// # Parameters
881    ///
882    /// - `info`: the pointer to the structure of `csrcolor()`.
883    #[deprecated]
884    pub fn cusparseDestroyColorInfo(info: cusparseColorInfo_t) -> cusparseStatus_t;
885}
886unsafe extern "C" {
887    /// This function creates and initializes structure of `prune` to *default* values.
888    ///
889    /// # Parameters
890    ///
891    /// - `info`: the pointer to the structure of `prune`.
892    #[deprecated]
893    pub fn cusparseCreatePruneInfo(info: *mut pruneInfo_t) -> cusparseStatus_t;
894}
895unsafe extern "C" {
896    /// This function destroys and releases any memory required by the structure.
897    ///
898    /// # Parameters
899    ///
900    /// - `info`: the structure of `prune`.
901    #[deprecated]
902    pub fn cusparseDestroyPruneInfo(info: pruneInfo_t) -> cusparseStatus_t;
903}
904unsafe extern "C" {
905    /// This function performs the matrix-vector operation
906    ///
907    /// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and
908    ///
909    /// ![image2](_images/op-transpose-2.png)
910    ///
911    /// * The routine supports asynchronous execution
912    /// * The routine supports CUDA graph capture
913    ///
914    /// The function [`cusparseSgemvi_bufferSize`] returns the size of buffer used in [`cusparseSgemvi`].
915    pub fn cusparseSgemvi(
916        handle: cusparseHandle_t,
917        transA: cusparseOperation_t,
918        m: ::core::ffi::c_int,
919        n: ::core::ffi::c_int,
920        alpha: *const f32,
921        A: *const f32,
922        lda: ::core::ffi::c_int,
923        nnz: ::core::ffi::c_int,
924        xVal: *const f32,
925        xInd: *const ::core::ffi::c_int,
926        beta: *const f32,
927        y: *mut f32,
928        idxBase: cusparseIndexBase_t,
929        pBuffer: *mut ::core::ffi::c_void,
930    ) -> cusparseStatus_t;
931}
932unsafe extern "C" {
933    pub fn cusparseSgemvi_bufferSize(
934        handle: cusparseHandle_t,
935        transA: cusparseOperation_t,
936        m: ::core::ffi::c_int,
937        n: ::core::ffi::c_int,
938        nnz: ::core::ffi::c_int,
939        pBufferSize: *mut ::core::ffi::c_int,
940    ) -> cusparseStatus_t;
941}
942unsafe extern "C" {
943    /// This function performs the matrix-vector operation
944    ///
945    /// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and
946    ///
947    /// ![image2](_images/op-transpose-2.png)
948    ///
949    /// * The routine supports asynchronous execution
950    /// * The routine supports CUDA graph capture
951    ///
952    /// The function [`cusparseDgemvi_bufferSize`] returns the size of buffer used in [`cusparseDgemvi`].
953    pub fn cusparseDgemvi(
954        handle: cusparseHandle_t,
955        transA: cusparseOperation_t,
956        m: ::core::ffi::c_int,
957        n: ::core::ffi::c_int,
958        alpha: *const f64,
959        A: *const f64,
960        lda: ::core::ffi::c_int,
961        nnz: ::core::ffi::c_int,
962        xVal: *const f64,
963        xInd: *const ::core::ffi::c_int,
964        beta: *const f64,
965        y: *mut f64,
966        idxBase: cusparseIndexBase_t,
967        pBuffer: *mut ::core::ffi::c_void,
968    ) -> cusparseStatus_t;
969}
970unsafe extern "C" {
971    pub fn cusparseDgemvi_bufferSize(
972        handle: cusparseHandle_t,
973        transA: cusparseOperation_t,
974        m: ::core::ffi::c_int,
975        n: ::core::ffi::c_int,
976        nnz: ::core::ffi::c_int,
977        pBufferSize: *mut ::core::ffi::c_int,
978    ) -> cusparseStatus_t;
979}
980unsafe extern "C" {
981    /// This function performs the matrix-vector operation
982    ///
983    /// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and
984    ///
985    /// ![image2](_images/op-transpose-2.png)
986    ///
987    /// * The routine supports asynchronous execution
988    /// * The routine supports CUDA graph capture
989    ///
990    /// The function [`cusparseCgemvi_bufferSize`] returns the size of buffer used in [`cusparseCgemvi`].
991    pub fn cusparseCgemvi(
992        handle: cusparseHandle_t,
993        transA: cusparseOperation_t,
994        m: ::core::ffi::c_int,
995        n: ::core::ffi::c_int,
996        alpha: *const cuComplex,
997        A: *const cuComplex,
998        lda: ::core::ffi::c_int,
999        nnz: ::core::ffi::c_int,
1000        xVal: *const cuComplex,
1001        xInd: *const ::core::ffi::c_int,
1002        beta: *const cuComplex,
1003        y: *mut cuComplex,
1004        idxBase: cusparseIndexBase_t,
1005        pBuffer: *mut ::core::ffi::c_void,
1006    ) -> cusparseStatus_t;
1007}
1008unsafe extern "C" {
1009    pub fn cusparseCgemvi_bufferSize(
1010        handle: cusparseHandle_t,
1011        transA: cusparseOperation_t,
1012        m: ::core::ffi::c_int,
1013        n: ::core::ffi::c_int,
1014        nnz: ::core::ffi::c_int,
1015        pBufferSize: *mut ::core::ffi::c_int,
1016    ) -> cusparseStatus_t;
1017}
1018unsafe extern "C" {
1019    /// This function performs the matrix-vector operation
1020    ///
1021    /// `A` is an $m \times n$ dense matrix and a sparse vector `x` that is defined in a sparse storage format by the two arrays `xVal, xInd` of length `nnz`, and `y` is a dense vector; $\alpha \\;$ and $\beta \\;$ are scalars; and
1022    ///
1023    /// ![image2](_images/op-transpose-2.png)
1024    ///
1025    /// * The routine supports asynchronous execution
1026    /// * The routine supports CUDA graph capture
1027    ///
1028    /// The function [`cusparseZgemvi_bufferSize`] returns the size of buffer used in [`cusparseZgemvi`].
1029    pub fn cusparseZgemvi(
1030        handle: cusparseHandle_t,
1031        transA: cusparseOperation_t,
1032        m: ::core::ffi::c_int,
1033        n: ::core::ffi::c_int,
1034        alpha: *const cuDoubleComplex,
1035        A: *const cuDoubleComplex,
1036        lda: ::core::ffi::c_int,
1037        nnz: ::core::ffi::c_int,
1038        xVal: *const cuDoubleComplex,
1039        xInd: *const ::core::ffi::c_int,
1040        beta: *const cuDoubleComplex,
1041        y: *mut cuDoubleComplex,
1042        idxBase: cusparseIndexBase_t,
1043        pBuffer: *mut ::core::ffi::c_void,
1044    ) -> cusparseStatus_t;
1045}
1046unsafe extern "C" {
1047    pub fn cusparseZgemvi_bufferSize(
1048        handle: cusparseHandle_t,
1049        transA: cusparseOperation_t,
1050        m: ::core::ffi::c_int,
1051        n: ::core::ffi::c_int,
1052        nnz: ::core::ffi::c_int,
1053        pBufferSize: *mut ::core::ffi::c_int,
1054    ) -> cusparseStatus_t;
1055}
1056unsafe extern "C" {
1057    /// This function performs the matrix-vector operation
1058    ///
1059    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and
1060    ///
1061    /// ![image1](_images/op-non-xpose.png)
1062    ///
1063    /// `bsrmv()` has the following properties:
1064    ///
1065    /// * The routine requires no extra storage.
1066    /// * The routine supports asynchronous execution.
1067    /// * The routine supports CUDA graph capture.
1068    ///
1069    /// Several comments on `bsrmv()`:
1070    ///
1071    /// * Only `blockDim > 1` is supported
1072    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
1073    ///
1074    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
1075    /// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
1076    ///
1077    /// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
1078    pub fn cusparseSbsrmv(
1079        handle: cusparseHandle_t,
1080        dirA: cusparseDirection_t,
1081        transA: cusparseOperation_t,
1082        mb: ::core::ffi::c_int,
1083        nb: ::core::ffi::c_int,
1084        nnzb: ::core::ffi::c_int,
1085        alpha: *const f32,
1086        descrA: cusparseMatDescr_t,
1087        bsrSortedValA: *const f32,
1088        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1089        bsrSortedColIndA: *const ::core::ffi::c_int,
1090        blockDim: ::core::ffi::c_int,
1091        x: *const f32,
1092        beta: *const f32,
1093        y: *mut f32,
1094    ) -> cusparseStatus_t;
1095}
1096unsafe extern "C" {
1097    /// This function performs the matrix-vector operation
1098    ///
1099    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and
1100    ///
1101    /// ![image1](_images/op-non-xpose.png)
1102    ///
1103    /// `bsrmv()` has the following properties:
1104    ///
1105    /// * The routine requires no extra storage.
1106    /// * The routine supports asynchronous execution.
1107    /// * The routine supports CUDA graph capture.
1108    ///
1109    /// Several comments on `bsrmv()`:
1110    ///
1111    /// * Only `blockDim > 1` is supported
1112    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
1113    ///
1114    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
1115    /// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
1116    ///
1117    /// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
1118    pub fn cusparseDbsrmv(
1119        handle: cusparseHandle_t,
1120        dirA: cusparseDirection_t,
1121        transA: cusparseOperation_t,
1122        mb: ::core::ffi::c_int,
1123        nb: ::core::ffi::c_int,
1124        nnzb: ::core::ffi::c_int,
1125        alpha: *const f64,
1126        descrA: cusparseMatDescr_t,
1127        bsrSortedValA: *const f64,
1128        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1129        bsrSortedColIndA: *const ::core::ffi::c_int,
1130        blockDim: ::core::ffi::c_int,
1131        x: *const f64,
1132        beta: *const f64,
1133        y: *mut f64,
1134    ) -> cusparseStatus_t;
1135}
1136unsafe extern "C" {
1137    /// This function performs the matrix-vector operation
1138    ///
1139    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and
1140    ///
1141    /// ![image1](_images/op-non-xpose.png)
1142    ///
1143    /// `bsrmv()` has the following properties:
1144    ///
1145    /// * The routine requires no extra storage.
1146    /// * The routine supports asynchronous execution.
1147    /// * The routine supports CUDA graph capture.
1148    ///
1149    /// Several comments on `bsrmv()`:
1150    ///
1151    /// * Only `blockDim > 1` is supported
1152    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
1153    ///
1154    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
1155    /// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
1156    ///
1157    /// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
1158    pub fn cusparseCbsrmv(
1159        handle: cusparseHandle_t,
1160        dirA: cusparseDirection_t,
1161        transA: cusparseOperation_t,
1162        mb: ::core::ffi::c_int,
1163        nb: ::core::ffi::c_int,
1164        nnzb: ::core::ffi::c_int,
1165        alpha: *const cuComplex,
1166        descrA: cusparseMatDescr_t,
1167        bsrSortedValA: *const cuComplex,
1168        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1169        bsrSortedColIndA: *const ::core::ffi::c_int,
1170        blockDim: ::core::ffi::c_int,
1171        x: *const cuComplex,
1172        beta: *const cuComplex,
1173        y: *mut cuComplex,
1174    ) -> cusparseStatus_t;
1175}
1176unsafe extern "C" {
1177    /// This function performs the matrix-vector operation
1178    ///
1179    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSR storage format by the three arrays `bsrVal`, `bsrRowPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{ and }\beta$ are scalars; and
1180    ///
1181    /// ![image1](_images/op-non-xpose.png)
1182    ///
1183    /// `bsrmv()` has the following properties:
1184    ///
1185    /// * The routine requires no extra storage.
1186    /// * The routine supports asynchronous execution.
1187    /// * The routine supports CUDA graph capture.
1188    ///
1189    /// Several comments on `bsrmv()`:
1190    ///
1191    /// * Only `blockDim > 1` is supported
1192    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] is supported, that is
1193    ///
1194    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported.
1195    /// * The size of vector `x` should be $(nb \ast blockDim)$ at least, and the size of vector `y` should be $(mb \ast blockDim)$ at least; otherwise, the kernel may return [`cusparseStatus_t::CUSPARSE_STATUS_EXECUTION_FAILED`] because of an out-of-bounds array.
1196    ///
1197    /// For example, suppose the user has a CSR format and wants to try `bsrmv()`, the following code demonstrates how to use `csr2bsr()` conversion and `bsrmv()` multiplication in single precision.
1198    pub fn cusparseZbsrmv(
1199        handle: cusparseHandle_t,
1200        dirA: cusparseDirection_t,
1201        transA: cusparseOperation_t,
1202        mb: ::core::ffi::c_int,
1203        nb: ::core::ffi::c_int,
1204        nnzb: ::core::ffi::c_int,
1205        alpha: *const cuDoubleComplex,
1206        descrA: cusparseMatDescr_t,
1207        bsrSortedValA: *const cuDoubleComplex,
1208        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1209        bsrSortedColIndA: *const ::core::ffi::c_int,
1210        blockDim: ::core::ffi::c_int,
1211        x: *const cuDoubleComplex,
1212        beta: *const cuDoubleComplex,
1213        y: *mut cuDoubleComplex,
1214    ) -> cusparseStatus_t;
1215}
1216unsafe extern "C" {
1217    /// This function performs a `bsrmv` and a mask operation
1218    ///
1219    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and
1220    ///
1221    /// ![image1](_images/op-non-xpose.png)
1222    ///
1223    /// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
1224    ///
1225    /// For example, consider the $2 \times 3$ block matrix $A$:
1226    ///
1227    /// and its one-based BSR format (three vector form) is:
1228    ///
1229    /// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
1230    ///
1231    /// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
1232    ///
1233    /// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
1234    ///
1235    /// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ [2] and `sizeOfMask`=1.
1236    ///
1237    /// The mask operator is equivalent to the following operation:
1238    ///
1239    /// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
1240    ///
1241    /// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr[0]` and `bsrEndPtr[0]` are not touched also.
1242    ///
1243    /// `bsrxmv()` has the following properties:
1244    ///
1245    /// * The routine requires no extra storage.
1246    /// * The routine supports asynchronous execution.
1247    /// * The routine supports CUDA graph capture.
1248    ///
1249    /// A couple of comments on `bsrxmv()`:
1250    ///
1251    /// * Only `blockDim > 1` is supported
1252    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
1253    /// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
1254    pub fn cusparseSbsrxmv(
1255        handle: cusparseHandle_t,
1256        dirA: cusparseDirection_t,
1257        transA: cusparseOperation_t,
1258        sizeOfMask: ::core::ffi::c_int,
1259        mb: ::core::ffi::c_int,
1260        nb: ::core::ffi::c_int,
1261        nnzb: ::core::ffi::c_int,
1262        alpha: *const f32,
1263        descrA: cusparseMatDescr_t,
1264        bsrSortedValA: *const f32,
1265        bsrSortedMaskPtrA: *const ::core::ffi::c_int,
1266        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1267        bsrSortedEndPtrA: *const ::core::ffi::c_int,
1268        bsrSortedColIndA: *const ::core::ffi::c_int,
1269        blockDim: ::core::ffi::c_int,
1270        x: *const f32,
1271        beta: *const f32,
1272        y: *mut f32,
1273    ) -> cusparseStatus_t;
1274}
1275unsafe extern "C" {
1276    /// This function performs a `bsrmv` and a mask operation
1277    ///
1278    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and
1279    ///
1280    /// ![image1](_images/op-non-xpose.png)
1281    ///
1282    /// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
1283    ///
1284    /// For example, consider the $2 \times 3$ block matrix $A$:
1285    ///
1286    /// and its one-based BSR format (three vector form) is:
1287    ///
1288    /// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
1289    ///
1290    /// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
1291    ///
1292    /// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
1293    ///
1294    /// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ [2] and `sizeOfMask`=1.
1295    ///
1296    /// The mask operator is equivalent to the following operation:
1297    ///
1298    /// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
1299    ///
1300    /// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr[0]` and `bsrEndPtr[0]` are not touched also.
1301    ///
1302    /// `bsrxmv()` has the following properties:
1303    ///
1304    /// * The routine requires no extra storage.
1305    /// * The routine supports asynchronous execution.
1306    /// * The routine supports CUDA graph capture.
1307    ///
1308    /// A couple of comments on `bsrxmv()`:
1309    ///
1310    /// * Only `blockDim > 1` is supported
1311    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
1312    /// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
1313    pub fn cusparseDbsrxmv(
1314        handle: cusparseHandle_t,
1315        dirA: cusparseDirection_t,
1316        transA: cusparseOperation_t,
1317        sizeOfMask: ::core::ffi::c_int,
1318        mb: ::core::ffi::c_int,
1319        nb: ::core::ffi::c_int,
1320        nnzb: ::core::ffi::c_int,
1321        alpha: *const f64,
1322        descrA: cusparseMatDescr_t,
1323        bsrSortedValA: *const f64,
1324        bsrSortedMaskPtrA: *const ::core::ffi::c_int,
1325        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1326        bsrSortedEndPtrA: *const ::core::ffi::c_int,
1327        bsrSortedColIndA: *const ::core::ffi::c_int,
1328        blockDim: ::core::ffi::c_int,
1329        x: *const f64,
1330        beta: *const f64,
1331        y: *mut f64,
1332    ) -> cusparseStatus_t;
1333}
1334unsafe extern "C" {
1335    /// This function performs a `bsrmv` and a mask operation
1336    ///
1337    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and
1338    ///
1339    /// ![image1](_images/op-non-xpose.png)
1340    ///
1341    /// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
1342    ///
1343    /// For example, consider the $2 \times 3$ block matrix $A$:
1344    ///
1345    /// and its one-based BSR format (three vector form) is:
1346    ///
1347    /// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
1348    ///
1349    /// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
1350    ///
1351    /// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
1352    ///
1353    /// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ [2] and `sizeOfMask`=1.
1354    ///
1355    /// The mask operator is equivalent to the following operation:
1356    ///
1357    /// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
1358    ///
1359    /// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr[0]` and `bsrEndPtr[0]` are not touched also.
1360    ///
1361    /// `bsrxmv()` has the following properties:
1362    ///
1363    /// * The routine requires no extra storage.
1364    /// * The routine supports asynchronous execution.
1365    /// * The routine supports CUDA graph capture.
1366    ///
1367    /// A couple of comments on `bsrxmv()`:
1368    ///
1369    /// * Only `blockDim > 1` is supported
1370    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
1371    /// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
1372    pub fn cusparseCbsrxmv(
1373        handle: cusparseHandle_t,
1374        dirA: cusparseDirection_t,
1375        transA: cusparseOperation_t,
1376        sizeOfMask: ::core::ffi::c_int,
1377        mb: ::core::ffi::c_int,
1378        nb: ::core::ffi::c_int,
1379        nnzb: ::core::ffi::c_int,
1380        alpha: *const cuComplex,
1381        descrA: cusparseMatDescr_t,
1382        bsrSortedValA: *const cuComplex,
1383        bsrSortedMaskPtrA: *const ::core::ffi::c_int,
1384        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1385        bsrSortedEndPtrA: *const ::core::ffi::c_int,
1386        bsrSortedColIndA: *const ::core::ffi::c_int,
1387        blockDim: ::core::ffi::c_int,
1388        x: *const cuComplex,
1389        beta: *const cuComplex,
1390        y: *mut cuComplex,
1391    ) -> cusparseStatus_t;
1392}
1393unsafe extern "C" {
1394    /// This function performs a `bsrmv` and a mask operation
1395    ///
1396    /// where $A\text{ is an }(mb \ast blockDim) \times (nb \ast blockDim)$ sparse matrix that is defined in BSRX storage format by the four arrays `bsrVal`, `bsrRowPtr`, `bsrEndPtr`, and `bsrColInd`); `x` and `y` are vectors; $\alpha\text{~and~}\beta$ are scalars; and
1397    ///
1398    /// ![image1](_images/op-non-xpose.png)
1399    ///
1400    /// The mask operation is defined by array `bsrMaskPtr` which contains updated block row indices of $y$. If row $i$ is not specified in `bsrMaskPtr`, then `bsrxmv()` does not touch row block $i$ of $A$ and $y$.
1401    ///
1402    /// For example, consider the $2 \times 3$ block matrix $A$:
1403    ///
1404    /// and its one-based BSR format (three vector form) is:
1405    ///
1406    /// Suppose we want to do the following `bsrmv` operation on a matrix $\bar{A}$ which is slightly different from $A$.
1407    ///
1408    /// We don’t need to create another BSR format for the new matrix $\bar{A}$, all that we should do is to keep `bsrVal` and `bsrColInd` unchanged, but modify `bsrRowPtr` and add an additional array `bsrEndPtr` which points to the last nonzero elements per row of $\bar{A}$ plus 1.
1409    ///
1410    /// For example, the following `bsrRowPtr` and `bsrEndPtr` can represent matrix $\bar{A}$:
1411    ///
1412    /// Further we can use a mask operator (specified by array `bsrMaskPtr`) to update particular block row indices of $y$ only because $y_{1}$ is never changed. In this case, `bsrMaskPtr`$=$ [2] and `sizeOfMask`=1.
1413    ///
1414    /// The mask operator is equivalent to the following operation:
1415    ///
1416    /// If a block row is not present in the `bsrMaskPtr`, then no calculation is performed on that row, and the corresponding value in `y` is unmodified. The question mark “?” is used to inidcate row blocks not in `bsrMaskPtr`.
1417    ///
1418    /// In this case, first row block is not present in `bsrMaskPtr`, so `bsrRowPtr[0]` and `bsrEndPtr[0]` are not touched also.
1419    ///
1420    /// `bsrxmv()` has the following properties:
1421    ///
1422    /// * The routine requires no extra storage.
1423    /// * The routine supports asynchronous execution.
1424    /// * The routine supports CUDA graph capture.
1425    ///
1426    /// A couple of comments on `bsrxmv()`:
1427    ///
1428    /// * Only `blockDim > 1` is supported
1429    /// * Only [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`] and [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] are supported.
1430    /// * Parameters `bsrMaskPtr`, `bsrRowPtr`, `bsrEndPtr` and `bsrColInd` are consistent with base index, either one-based or zero-based. The above example is one-based.
1431    pub fn cusparseZbsrxmv(
1432        handle: cusparseHandle_t,
1433        dirA: cusparseDirection_t,
1434        transA: cusparseOperation_t,
1435        sizeOfMask: ::core::ffi::c_int,
1436        mb: ::core::ffi::c_int,
1437        nb: ::core::ffi::c_int,
1438        nnzb: ::core::ffi::c_int,
1439        alpha: *const cuDoubleComplex,
1440        descrA: cusparseMatDescr_t,
1441        bsrSortedValA: *const cuDoubleComplex,
1442        bsrSortedMaskPtrA: *const ::core::ffi::c_int,
1443        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1444        bsrSortedEndPtrA: *const ::core::ffi::c_int,
1445        bsrSortedColIndA: *const ::core::ffi::c_int,
1446        blockDim: ::core::ffi::c_int,
1447        x: *const cuDoubleComplex,
1448        beta: *const cuDoubleComplex,
1449        y: *mut cuDoubleComplex,
1450    ) -> cusparseStatus_t;
1451}
1452unsafe extern "C" {
1453    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` is either structural zero or numerical zero (singular block). Otherwise `position=-1`.
1454    ///
1455    /// The `position` can be 0-based or 1-based, the same as the matrix.
1456    ///
1457    /// Function [`cusparseXbsrsv2_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
1458    ///
1459    /// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
1460    ///
1461    /// * The routine requires no extra storage
1462    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
1463    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
1464    ///
1465    /// # Parameters
1466    ///
1467    /// - `position`: if no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
1468    #[deprecated]
1469    pub fn cusparseXbsrsv2_zeroPivot(
1470        handle: cusparseHandle_t,
1471        info: bsrsv2Info_t,
1472        position: *mut ::core::ffi::c_int,
1473    ) -> cusparseStatus_t;
1474}
1475unsafe extern "C" {
1476    /// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1477    ///
1478    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1479    ///
1480    /// ![image1](_images/op-non-xpose.png)
1481    ///
1482    /// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
1483    ///
1484    /// * The routine requires no extra storage.
1485    /// * The routine supports asynchronous execution.
1486    /// * The routine supports CUDA graph capture.
1487    pub fn cusparseSbsrsv2_bufferSize(
1488        handle: cusparseHandle_t,
1489        dirA: cusparseDirection_t,
1490        transA: cusparseOperation_t,
1491        mb: ::core::ffi::c_int,
1492        nnzb: ::core::ffi::c_int,
1493        descrA: cusparseMatDescr_t,
1494        bsrSortedValA: *mut f32,
1495        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1496        bsrSortedColIndA: *const ::core::ffi::c_int,
1497        blockDim: ::core::ffi::c_int,
1498        info: bsrsv2Info_t,
1499        pBufferSizeInBytes: *mut ::core::ffi::c_int,
1500    ) -> cusparseStatus_t;
1501}
1502unsafe extern "C" {
1503    /// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1504    ///
1505    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1506    ///
1507    /// ![image1](_images/op-non-xpose.png)
1508    ///
1509    /// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
1510    ///
1511    /// * The routine requires no extra storage.
1512    /// * The routine supports asynchronous execution.
1513    /// * The routine supports CUDA graph capture.
1514    pub fn cusparseDbsrsv2_bufferSize(
1515        handle: cusparseHandle_t,
1516        dirA: cusparseDirection_t,
1517        transA: cusparseOperation_t,
1518        mb: ::core::ffi::c_int,
1519        nnzb: ::core::ffi::c_int,
1520        descrA: cusparseMatDescr_t,
1521        bsrSortedValA: *mut f64,
1522        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1523        bsrSortedColIndA: *const ::core::ffi::c_int,
1524        blockDim: ::core::ffi::c_int,
1525        info: bsrsv2Info_t,
1526        pBufferSizeInBytes: *mut ::core::ffi::c_int,
1527    ) -> cusparseStatus_t;
1528}
1529unsafe extern "C" {
1530    /// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1531    ///
1532    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1533    ///
1534    /// ![image1](_images/op-non-xpose.png)
1535    ///
1536    /// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
1537    ///
1538    /// * The routine requires no extra storage.
1539    /// * The routine supports asynchronous execution.
1540    /// * The routine supports CUDA graph capture.
1541    pub fn cusparseCbsrsv2_bufferSize(
1542        handle: cusparseHandle_t,
1543        dirA: cusparseDirection_t,
1544        transA: cusparseOperation_t,
1545        mb: ::core::ffi::c_int,
1546        nnzb: ::core::ffi::c_int,
1547        descrA: cusparseMatDescr_t,
1548        bsrSortedValA: *mut cuComplex,
1549        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1550        bsrSortedColIndA: *const ::core::ffi::c_int,
1551        blockDim: ::core::ffi::c_int,
1552        info: bsrsv2Info_t,
1553        pBufferSizeInBytes: *mut ::core::ffi::c_int,
1554    ) -> cusparseStatus_t;
1555}
1556unsafe extern "C" {
1557    /// This function returns size of the buffer used in `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1558    ///
1559    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1560    ///
1561    /// ![image1](_images/op-non-xpose.png)
1562    ///
1563    /// Although there are six combinations in terms of parameter `trans` and the upper (lower) triangular part of `A`, `bsrsv2_bufferSize()` returns the maximum size buffer among these combinations. The buffer size depends on the dimensions `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrsv2_bufferSize()` again to have the correct buffer size; otherwise a segmentation fault may occur.
1564    ///
1565    /// * The routine requires no extra storage.
1566    /// * The routine supports asynchronous execution.
1567    /// * The routine supports CUDA graph capture.
1568    pub fn cusparseZbsrsv2_bufferSize(
1569        handle: cusparseHandle_t,
1570        dirA: cusparseDirection_t,
1571        transA: cusparseOperation_t,
1572        mb: ::core::ffi::c_int,
1573        nnzb: ::core::ffi::c_int,
1574        descrA: cusparseMatDescr_t,
1575        bsrSortedValA: *mut cuDoubleComplex,
1576        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1577        bsrSortedColIndA: *const ::core::ffi::c_int,
1578        blockDim: ::core::ffi::c_int,
1579        info: bsrsv2Info_t,
1580        pBufferSizeInBytes: *mut ::core::ffi::c_int,
1581    ) -> cusparseStatus_t;
1582}
1583unsafe extern "C" {
1584    pub fn cusparseSbsrsv2_bufferSizeExt(
1585        handle: cusparseHandle_t,
1586        dirA: cusparseDirection_t,
1587        transA: cusparseOperation_t,
1588        mb: ::core::ffi::c_int,
1589        nnzb: ::core::ffi::c_int,
1590        descrA: cusparseMatDescr_t,
1591        bsrSortedValA: *mut f32,
1592        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1593        bsrSortedColIndA: *const ::core::ffi::c_int,
1594        blockSize: ::core::ffi::c_int,
1595        info: bsrsv2Info_t,
1596        pBufferSize: *mut size_t,
1597    ) -> cusparseStatus_t;
1598}
1599unsafe extern "C" {
1600    pub fn cusparseDbsrsv2_bufferSizeExt(
1601        handle: cusparseHandle_t,
1602        dirA: cusparseDirection_t,
1603        transA: cusparseOperation_t,
1604        mb: ::core::ffi::c_int,
1605        nnzb: ::core::ffi::c_int,
1606        descrA: cusparseMatDescr_t,
1607        bsrSortedValA: *mut f64,
1608        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1609        bsrSortedColIndA: *const ::core::ffi::c_int,
1610        blockSize: ::core::ffi::c_int,
1611        info: bsrsv2Info_t,
1612        pBufferSize: *mut size_t,
1613    ) -> cusparseStatus_t;
1614}
1615unsafe extern "C" {
1616    pub fn cusparseCbsrsv2_bufferSizeExt(
1617        handle: cusparseHandle_t,
1618        dirA: cusparseDirection_t,
1619        transA: cusparseOperation_t,
1620        mb: ::core::ffi::c_int,
1621        nnzb: ::core::ffi::c_int,
1622        descrA: cusparseMatDescr_t,
1623        bsrSortedValA: *mut cuComplex,
1624        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1625        bsrSortedColIndA: *const ::core::ffi::c_int,
1626        blockSize: ::core::ffi::c_int,
1627        info: bsrsv2Info_t,
1628        pBufferSize: *mut size_t,
1629    ) -> cusparseStatus_t;
1630}
1631unsafe extern "C" {
1632    pub fn cusparseZbsrsv2_bufferSizeExt(
1633        handle: cusparseHandle_t,
1634        dirA: cusparseDirection_t,
1635        transA: cusparseOperation_t,
1636        mb: ::core::ffi::c_int,
1637        nnzb: ::core::ffi::c_int,
1638        descrA: cusparseMatDescr_t,
1639        bsrSortedValA: *mut cuDoubleComplex,
1640        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1641        bsrSortedColIndA: *const ::core::ffi::c_int,
1642        blockSize: ::core::ffi::c_int,
1643        info: bsrsv2Info_t,
1644        pBufferSize: *mut size_t,
1645    ) -> cusparseStatus_t;
1646}
1647unsafe extern "C" {
1648    /// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1649    ///
1650    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and
1651    ///
1652    /// ![image1](_images/op-non-xpose.png)
1653    ///
1654    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
1655    ///
1656    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
1657    ///
1658    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1659    ///
1660    /// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
1661    ///
1662    /// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
1663    ///
1664    /// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
1665    ///
1666    /// * This function requires temporary extra storage that is allocated internally
1667    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
1668    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
1669    pub fn cusparseSbsrsv2_analysis(
1670        handle: cusparseHandle_t,
1671        dirA: cusparseDirection_t,
1672        transA: cusparseOperation_t,
1673        mb: ::core::ffi::c_int,
1674        nnzb: ::core::ffi::c_int,
1675        descrA: cusparseMatDescr_t,
1676        bsrSortedValA: *const f32,
1677        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1678        bsrSortedColIndA: *const ::core::ffi::c_int,
1679        blockDim: ::core::ffi::c_int,
1680        info: bsrsv2Info_t,
1681        policy: cusparseSolvePolicy_t,
1682        pBuffer: *mut ::core::ffi::c_void,
1683    ) -> cusparseStatus_t;
1684}
1685unsafe extern "C" {
1686    /// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1687    ///
1688    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and
1689    ///
1690    /// ![image1](_images/op-non-xpose.png)
1691    ///
1692    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
1693    ///
1694    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
1695    ///
1696    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1697    ///
1698    /// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
1699    ///
1700    /// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
1701    ///
1702    /// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
1703    ///
1704    /// * This function requires temporary extra storage that is allocated internally
1705    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
1706    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
1707    pub fn cusparseDbsrsv2_analysis(
1708        handle: cusparseHandle_t,
1709        dirA: cusparseDirection_t,
1710        transA: cusparseOperation_t,
1711        mb: ::core::ffi::c_int,
1712        nnzb: ::core::ffi::c_int,
1713        descrA: cusparseMatDescr_t,
1714        bsrSortedValA: *const f64,
1715        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1716        bsrSortedColIndA: *const ::core::ffi::c_int,
1717        blockDim: ::core::ffi::c_int,
1718        info: bsrsv2Info_t,
1719        policy: cusparseSolvePolicy_t,
1720        pBuffer: *mut ::core::ffi::c_void,
1721    ) -> cusparseStatus_t;
1722}
1723unsafe extern "C" {
1724    /// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1725    ///
1726    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and
1727    ///
1728    /// ![image1](_images/op-non-xpose.png)
1729    ///
1730    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
1731    ///
1732    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
1733    ///
1734    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1735    ///
1736    /// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
1737    ///
1738    /// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
1739    ///
1740    /// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
1741    ///
1742    /// * This function requires temporary extra storage that is allocated internally
1743    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
1744    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
1745    pub fn cusparseCbsrsv2_analysis(
1746        handle: cusparseHandle_t,
1747        dirA: cusparseDirection_t,
1748        transA: cusparseOperation_t,
1749        mb: ::core::ffi::c_int,
1750        nnzb: ::core::ffi::c_int,
1751        descrA: cusparseMatDescr_t,
1752        bsrSortedValA: *const cuComplex,
1753        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1754        bsrSortedColIndA: *const ::core::ffi::c_int,
1755        blockDim: ::core::ffi::c_int,
1756        info: bsrsv2Info_t,
1757        policy: cusparseSolvePolicy_t,
1758        pBuffer: *mut ::core::ffi::c_void,
1759    ) -> cusparseStatus_t;
1760}
1761unsafe extern "C" {
1762    /// This function performs the analysis phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1763    ///
1764    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand side and the solution vectors; $\alpha$ is a scalar; and
1765    ///
1766    /// ![image1](_images/op-non-xpose.png)
1767    ///
1768    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
1769    ///
1770    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
1771    ///
1772    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1773    ///
1774    /// Function `bsrsv2_analysis()` reports a structural zero and computes level information, which stored in the opaque structure `info`. The level information can extract more parallelism for a triangular solver. However `bsrsv2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
1775    ///
1776    /// Function `bsrsv2_analysis()` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. No structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the structural zero is.
1777    ///
1778    /// It is the user’s choice whether to call `bsrsv2_solve()` if `bsrsv2_analysis()` reports a structural zero. In this case, the user can still call `bsrsv2_solve()`, which will return a numerical zero at the same position as a structural zero. However the result `x` is meaningless.
1779    ///
1780    /// * This function requires temporary extra storage that is allocated internally
1781    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
1782    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
1783    pub fn cusparseZbsrsv2_analysis(
1784        handle: cusparseHandle_t,
1785        dirA: cusparseDirection_t,
1786        transA: cusparseOperation_t,
1787        mb: ::core::ffi::c_int,
1788        nnzb: ::core::ffi::c_int,
1789        descrA: cusparseMatDescr_t,
1790        bsrSortedValA: *const cuDoubleComplex,
1791        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1792        bsrSortedColIndA: *const ::core::ffi::c_int,
1793        blockDim: ::core::ffi::c_int,
1794        info: bsrsv2Info_t,
1795        policy: cusparseSolvePolicy_t,
1796        pBuffer: *mut ::core::ffi::c_void,
1797    ) -> cusparseStatus_t;
1798}
1799unsafe extern "C" {
1800    /// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1801    ///
1802    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1803    ///
1804    /// ![image1](_images/op-non-xpose.png)
1805    ///
1806    /// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
1807    ///
1808    /// This function may be executed multiple times for a given matrix and a particular operation type.
1809    ///
1810    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1811    ///
1812    /// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1813    ///
1814    /// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
1815    ///
1816    /// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
1817    ///
1818    /// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
1819    ///
1820    /// The function supports the following properties if `pBuffer != NULL`:
1821    ///
1822    /// * The routine requires no extra storage.
1823    /// * The routine supports asynchronous execution.
1824    /// * The routine supports CUDA graph capture.
1825    ///
1826    /// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
1827    pub fn cusparseSbsrsv2_solve(
1828        handle: cusparseHandle_t,
1829        dirA: cusparseDirection_t,
1830        transA: cusparseOperation_t,
1831        mb: ::core::ffi::c_int,
1832        nnzb: ::core::ffi::c_int,
1833        alpha: *const f32,
1834        descrA: cusparseMatDescr_t,
1835        bsrSortedValA: *const f32,
1836        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1837        bsrSortedColIndA: *const ::core::ffi::c_int,
1838        blockDim: ::core::ffi::c_int,
1839        info: bsrsv2Info_t,
1840        f: *const f32,
1841        x: *mut f32,
1842        policy: cusparseSolvePolicy_t,
1843        pBuffer: *mut ::core::ffi::c_void,
1844    ) -> cusparseStatus_t;
1845}
1846unsafe extern "C" {
1847    /// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1848    ///
1849    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1850    ///
1851    /// ![image1](_images/op-non-xpose.png)
1852    ///
1853    /// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
1854    ///
1855    /// This function may be executed multiple times for a given matrix and a particular operation type.
1856    ///
1857    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1858    ///
1859    /// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1860    ///
1861    /// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
1862    ///
1863    /// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
1864    ///
1865    /// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
1866    ///
1867    /// The function supports the following properties if `pBuffer != NULL`:
1868    ///
1869    /// * The routine requires no extra storage.
1870    /// * The routine supports asynchronous execution.
1871    /// * The routine supports CUDA graph capture.
1872    ///
1873    /// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
1874    pub fn cusparseDbsrsv2_solve(
1875        handle: cusparseHandle_t,
1876        dirA: cusparseDirection_t,
1877        transA: cusparseOperation_t,
1878        mb: ::core::ffi::c_int,
1879        nnzb: ::core::ffi::c_int,
1880        alpha: *const f64,
1881        descrA: cusparseMatDescr_t,
1882        bsrSortedValA: *const f64,
1883        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1884        bsrSortedColIndA: *const ::core::ffi::c_int,
1885        blockDim: ::core::ffi::c_int,
1886        info: bsrsv2Info_t,
1887        f: *const f64,
1888        x: *mut f64,
1889        policy: cusparseSolvePolicy_t,
1890        pBuffer: *mut ::core::ffi::c_void,
1891    ) -> cusparseStatus_t;
1892}
1893unsafe extern "C" {
1894    /// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1895    ///
1896    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1897    ///
1898    /// ![image1](_images/op-non-xpose.png)
1899    ///
1900    /// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
1901    ///
1902    /// This function may be executed multiple times for a given matrix and a particular operation type.
1903    ///
1904    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1905    ///
1906    /// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1907    ///
1908    /// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
1909    ///
1910    /// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
1911    ///
1912    /// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
1913    ///
1914    /// The function supports the following properties if `pBuffer != NULL`:
1915    ///
1916    /// * The routine requires no extra storage.
1917    /// * The routine supports asynchronous execution.
1918    /// * The routine supports CUDA graph capture.
1919    ///
1920    /// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
1921    pub fn cusparseCbsrsv2_solve(
1922        handle: cusparseHandle_t,
1923        dirA: cusparseDirection_t,
1924        transA: cusparseOperation_t,
1925        mb: ::core::ffi::c_int,
1926        nnzb: ::core::ffi::c_int,
1927        alpha: *const cuComplex,
1928        descrA: cusparseMatDescr_t,
1929        bsrSortedValA: *const cuComplex,
1930        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1931        bsrSortedColIndA: *const ::core::ffi::c_int,
1932        blockDim: ::core::ffi::c_int,
1933        info: bsrsv2Info_t,
1934        f: *const cuComplex,
1935        x: *mut cuComplex,
1936        policy: cusparseSolvePolicy_t,
1937        pBuffer: *mut ::core::ffi::c_void,
1938    ) -> cusparseStatus_t;
1939}
1940unsafe extern "C" {
1941    /// This function performs the solve phase of `bsrsv2`, a new sparse triangular linear system `op(A)*y =`$\alpha$`x`.
1942    ///
1943    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `x` and `y` are the right-hand-side and the solution vectors; $\alpha$ is a scalar; and
1944    ///
1945    /// ![image1](_images/op-non-xpose.png)
1946    ///
1947    /// The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsv02_solve()` can support an arbitrary `blockDim`.
1948    ///
1949    /// This function may be executed multiple times for a given matrix and a particular operation type.
1950    ///
1951    /// This function requires a buffer size returned by `bsrsv2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1952    ///
1953    /// Although `bsrsv2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsv2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsv2_solve()` can be run with or without levels. On the other hand, if `bsrsv2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsv2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
1954    ///
1955    /// The level information may not improve the performance, but may spend extra time doing analysis. For example, a tridiagonal matrix has no parallelism. In this case, [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] performs better than [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`]. If the user has an iterative solver, the best approach is to do `bsrsv2_analysis()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] once. Then do `bsrsv2_solve()` with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`] in the first run, and with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`] in the second run, and pick the fastest one to perform the remaining iterations.
1956    ///
1957    /// Function `bsrsv02_solve()` has the same behavior as `csrsv02_solve()`. That is, `bsr2csr(bsrsv02(A)) = csrsv02(bsr2csr(A))`. The numerical zero of `csrsv02_solve()` means there exists some zero `A(j,j)`. The numerical zero of `bsrsv02_solve()` means there exists some block `A(j,j)` that is not invertible.
1958    ///
1959    /// Function `bsrsv2_solve()` reports the first numerical zero, including a structural zero. No numerical zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if `A(j,j)` is not invertible for some `j`. The user needs to call [`cusparseXbsrsv2_zeroPivot`] to know where the numerical zero is.
1960    ///
1961    /// The function supports the following properties if `pBuffer != NULL`:
1962    ///
1963    /// * The routine requires no extra storage.
1964    /// * The routine supports asynchronous execution.
1965    /// * The routine supports CUDA graph capture.
1966    ///
1967    /// For example, suppose L is a lower triangular matrix with unit diagonal, then the following code solves `L*y=x` by level information.
1968    pub fn cusparseZbsrsv2_solve(
1969        handle: cusparseHandle_t,
1970        dirA: cusparseDirection_t,
1971        transA: cusparseOperation_t,
1972        mb: ::core::ffi::c_int,
1973        nnzb: ::core::ffi::c_int,
1974        alpha: *const cuDoubleComplex,
1975        descrA: cusparseMatDescr_t,
1976        bsrSortedValA: *const cuDoubleComplex,
1977        bsrSortedRowPtrA: *const ::core::ffi::c_int,
1978        bsrSortedColIndA: *const ::core::ffi::c_int,
1979        blockDim: ::core::ffi::c_int,
1980        info: bsrsv2Info_t,
1981        f: *const cuDoubleComplex,
1982        x: *mut cuDoubleComplex,
1983        policy: cusparseSolvePolicy_t,
1984        pBuffer: *mut ::core::ffi::c_void,
1985    ) -> cusparseStatus_t;
1986}
1987unsafe extern "C" {
1988    /// This function performs one of the following matrix-matrix operations:
1989    ///
1990    /// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and
1991    ///
1992    /// ![image3](_images/op-a-notsupported.png)
1993    ///
1994    /// and
1995    ///
1996    /// ![image4](_images/op-b-notsupported.png)
1997    ///
1998    /// The function has the following limitations:
1999    ///
2000    /// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
2001    /// * only `blockDim > 1` is supported
2002    /// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
2003    /// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
2004    /// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
2005    ///
2006    /// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
2007    ///
2008    /// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
2009    ///
2010    /// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublasSgeam()`, and then to perform `A*transpose(Bt)`.
2011    ///
2012    /// `bsrmm()` has the following properties:
2013    ///
2014    /// * The routine requires no extra storage.
2015    /// * The routine supports asynchronous execution.
2016    /// * The routine supports CUDA graph capture.
2017    pub fn cusparseSbsrmm(
2018        handle: cusparseHandle_t,
2019        dirA: cusparseDirection_t,
2020        transA: cusparseOperation_t,
2021        transB: cusparseOperation_t,
2022        mb: ::core::ffi::c_int,
2023        n: ::core::ffi::c_int,
2024        kb: ::core::ffi::c_int,
2025        nnzb: ::core::ffi::c_int,
2026        alpha: *const f32,
2027        descrA: cusparseMatDescr_t,
2028        bsrSortedValA: *const f32,
2029        bsrSortedRowPtrA: *const ::core::ffi::c_int,
2030        bsrSortedColIndA: *const ::core::ffi::c_int,
2031        blockSize: ::core::ffi::c_int,
2032        B: *const f32,
2033        ldb: ::core::ffi::c_int,
2034        beta: *const f32,
2035        C: *mut f32,
2036        ldc: ::core::ffi::c_int,
2037    ) -> cusparseStatus_t;
2038}
2039unsafe extern "C" {
2040    /// This function performs one of the following matrix-matrix operations:
2041    ///
2042    /// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and
2043    ///
2044    /// ![image3](_images/op-a-notsupported.png)
2045    ///
2046    /// and
2047    ///
2048    /// ![image4](_images/op-b-notsupported.png)
2049    ///
2050    /// The function has the following limitations:
2051    ///
2052    /// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
2053    /// * only `blockDim > 1` is supported
2054    /// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
2055    /// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
2056    /// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
2057    ///
2058    /// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
2059    ///
2060    /// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
2061    ///
2062    /// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublasDgeam()`, and then to perform `A*transpose(Bt)`.
2063    ///
2064    /// `bsrmm()` has the following properties:
2065    ///
2066    /// * The routine requires no extra storage.
2067    /// * The routine supports asynchronous execution.
2068    /// * The routine supports CUDA graph capture.
2069    pub fn cusparseDbsrmm(
2070        handle: cusparseHandle_t,
2071        dirA: cusparseDirection_t,
2072        transA: cusparseOperation_t,
2073        transB: cusparseOperation_t,
2074        mb: ::core::ffi::c_int,
2075        n: ::core::ffi::c_int,
2076        kb: ::core::ffi::c_int,
2077        nnzb: ::core::ffi::c_int,
2078        alpha: *const f64,
2079        descrA: cusparseMatDescr_t,
2080        bsrSortedValA: *const f64,
2081        bsrSortedRowPtrA: *const ::core::ffi::c_int,
2082        bsrSortedColIndA: *const ::core::ffi::c_int,
2083        blockSize: ::core::ffi::c_int,
2084        B: *const f64,
2085        ldb: ::core::ffi::c_int,
2086        beta: *const f64,
2087        C: *mut f64,
2088        ldc: ::core::ffi::c_int,
2089    ) -> cusparseStatus_t;
2090}
2091unsafe extern "C" {
2092    /// This function performs one of the following matrix-matrix operations:
2093    ///
2094    /// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and
2095    ///
2096    /// ![image3](_images/op-a-notsupported.png)
2097    ///
2098    /// and
2099    ///
2100    /// ![image4](_images/op-b-notsupported.png)
2101    ///
2102    /// The function has the following limitations:
2103    ///
2104    /// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
2105    /// * only `blockDim > 1` is supported
2106    /// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
2107    /// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
2108    /// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
2109    ///
2110    /// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
2111    ///
2112    /// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
2113    ///
2114    /// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublasCgeam()`, and then to perform `A*transpose(Bt)`.
2115    ///
2116    /// `bsrmm()` has the following properties:
2117    ///
2118    /// * The routine requires no extra storage.
2119    /// * The routine supports asynchronous execution.
2120    /// * The routine supports CUDA graph capture.
2121    pub fn cusparseCbsrmm(
2122        handle: cusparseHandle_t,
2123        dirA: cusparseDirection_t,
2124        transA: cusparseOperation_t,
2125        transB: cusparseOperation_t,
2126        mb: ::core::ffi::c_int,
2127        n: ::core::ffi::c_int,
2128        kb: ::core::ffi::c_int,
2129        nnzb: ::core::ffi::c_int,
2130        alpha: *const cuComplex,
2131        descrA: cusparseMatDescr_t,
2132        bsrSortedValA: *const cuComplex,
2133        bsrSortedRowPtrA: *const ::core::ffi::c_int,
2134        bsrSortedColIndA: *const ::core::ffi::c_int,
2135        blockSize: ::core::ffi::c_int,
2136        B: *const cuComplex,
2137        ldb: ::core::ffi::c_int,
2138        beta: *const cuComplex,
2139        C: *mut cuComplex,
2140        ldc: ::core::ffi::c_int,
2141    ) -> cusparseStatus_t;
2142}
2143unsafe extern "C" {
2144    /// This function performs one of the following matrix-matrix operations:
2145    ///
2146    /// `A` is an $mb \times kb$ sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`; `B` and `C` are dense matrices; $\alpha\text{~and~}\beta$ are scalars; and
2147    ///
2148    /// ![image3](_images/op-a-notsupported.png)
2149    ///
2150    /// and
2151    ///
2152    /// ![image4](_images/op-b-notsupported.png)
2153    ///
2154    /// The function has the following limitations:
2155    ///
2156    /// * only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] matrix type is supported
2157    /// * only `blockDim > 1` is supported
2158    /// * if `blockDim` ≤ 4, then max(mb)/max(n) = 524,272
2159    /// * if 4 < `blockDim` ≤ 8, then max(mb) = 524,272, max(n) = 262,136
2160    /// * if `blockDim` > 8, then m < 65,535 and max(n) = 262,136
2161    ///
2162    /// The motivation of `transpose(B)` is to improve memory access of matrix `B`. The computational pattern of `A*transpose(B)` with matrix `B` in column-major order is equivalent to `A*B` with matrix `B` in row-major order.
2163    ///
2164    /// In practice, no operation in an iterative solver or eigenvalue solver uses `A*transpose(B)`. However, we can perform `A*transpose(transpose(B))` which is the same as `A*B`. For example, suppose `A` is `mb*kb`, `B` is `k*n` and `C` is `m*n`, the following code shows usage of [`cusparseDbsrmm`].
2165    ///
2166    /// Instead of using `A*B`, our proposal is to transpose `B` to `Bt` by first calling `cublasZgeam()`, and then to perform `A*transpose(Bt)`.
2167    ///
2168    /// `bsrmm()` has the following properties:
2169    ///
2170    /// * The routine requires no extra storage.
2171    /// * The routine supports asynchronous execution.
2172    /// * The routine supports CUDA graph capture.
2173    pub fn cusparseZbsrmm(
2174        handle: cusparseHandle_t,
2175        dirA: cusparseDirection_t,
2176        transA: cusparseOperation_t,
2177        transB: cusparseOperation_t,
2178        mb: ::core::ffi::c_int,
2179        n: ::core::ffi::c_int,
2180        kb: ::core::ffi::c_int,
2181        nnzb: ::core::ffi::c_int,
2182        alpha: *const cuDoubleComplex,
2183        descrA: cusparseMatDescr_t,
2184        bsrSortedValA: *const cuDoubleComplex,
2185        bsrSortedRowPtrA: *const ::core::ffi::c_int,
2186        bsrSortedColIndA: *const ::core::ffi::c_int,
2187        blockSize: ::core::ffi::c_int,
2188        B: *const cuDoubleComplex,
2189        ldb: ::core::ffi::c_int,
2190        beta: *const cuDoubleComplex,
2191        C: *mut cuDoubleComplex,
2192        ldc: ::core::ffi::c_int,
2193    ) -> cusparseStatus_t;
2194}
2195unsafe extern "C" {
2196    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` is either a structural zero or a numerical zero (singular block). Otherwise `position=-1`.
2197    ///
2198    /// The `position` can be 0-base or 1-base, the same as the matrix.
2199    ///
2200    /// Function [`cusparseXbsrsm2_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
2201    ///
2202    /// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
2203    ///
2204    /// * The routine requires no extra storage.
2205    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
2206    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2207    ///
2208    /// # Parameters
2209    ///
2210    /// - `position`: if no structural or numerical zero, `position` is -1; otherwise, if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
2211    #[deprecated]
2212    pub fn cusparseXbsrsm2_zeroPivot(
2213        handle: cusparseHandle_t,
2214        info: bsrsm2Info_t,
2215        position: *mut ::core::ffi::c_int,
2216    ) -> cusparseStatus_t;
2217}
2218unsafe extern "C" {
2219    /// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
2220    ///
2221    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2222    ///
2223    /// ![image9](_images/op-a-non-xpose-1.png)
2224    ///
2225    /// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
2226    ///
2227    /// * The routine requires no extra storage.
2228    /// * The routine supports asynchronous execution.
2229    /// * The routine supports CUDA graph capture.
2230    pub fn cusparseSbsrsm2_bufferSize(
2231        handle: cusparseHandle_t,
2232        dirA: cusparseDirection_t,
2233        transA: cusparseOperation_t,
2234        transXY: cusparseOperation_t,
2235        mb: ::core::ffi::c_int,
2236        n: ::core::ffi::c_int,
2237        nnzb: ::core::ffi::c_int,
2238        descrA: cusparseMatDescr_t,
2239        bsrSortedVal: *mut f32,
2240        bsrSortedRowPtr: *const ::core::ffi::c_int,
2241        bsrSortedColInd: *const ::core::ffi::c_int,
2242        blockSize: ::core::ffi::c_int,
2243        info: bsrsm2Info_t,
2244        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2245    ) -> cusparseStatus_t;
2246}
2247unsafe extern "C" {
2248    /// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
2249    ///
2250    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2251    ///
2252    /// ![image9](_images/op-a-non-xpose-1.png)
2253    ///
2254    /// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
2255    ///
2256    /// * The routine requires no extra storage.
2257    /// * The routine supports asynchronous execution.
2258    /// * The routine supports CUDA graph capture.
2259    pub fn cusparseDbsrsm2_bufferSize(
2260        handle: cusparseHandle_t,
2261        dirA: cusparseDirection_t,
2262        transA: cusparseOperation_t,
2263        transXY: cusparseOperation_t,
2264        mb: ::core::ffi::c_int,
2265        n: ::core::ffi::c_int,
2266        nnzb: ::core::ffi::c_int,
2267        descrA: cusparseMatDescr_t,
2268        bsrSortedVal: *mut f64,
2269        bsrSortedRowPtr: *const ::core::ffi::c_int,
2270        bsrSortedColInd: *const ::core::ffi::c_int,
2271        blockSize: ::core::ffi::c_int,
2272        info: bsrsm2Info_t,
2273        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2274    ) -> cusparseStatus_t;
2275}
2276unsafe extern "C" {
2277    /// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
2278    ///
2279    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2280    ///
2281    /// ![image9](_images/op-a-non-xpose-1.png)
2282    ///
2283    /// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
2284    ///
2285    /// * The routine requires no extra storage.
2286    /// * The routine supports asynchronous execution.
2287    /// * The routine supports CUDA graph capture.
2288    pub fn cusparseCbsrsm2_bufferSize(
2289        handle: cusparseHandle_t,
2290        dirA: cusparseDirection_t,
2291        transA: cusparseOperation_t,
2292        transXY: cusparseOperation_t,
2293        mb: ::core::ffi::c_int,
2294        n: ::core::ffi::c_int,
2295        nnzb: ::core::ffi::c_int,
2296        descrA: cusparseMatDescr_t,
2297        bsrSortedVal: *mut cuComplex,
2298        bsrSortedRowPtr: *const ::core::ffi::c_int,
2299        bsrSortedColInd: *const ::core::ffi::c_int,
2300        blockSize: ::core::ffi::c_int,
2301        info: bsrsm2Info_t,
2302        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2303    ) -> cusparseStatus_t;
2304}
2305unsafe extern "C" {
2306    /// This function returns size of buffer used in `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X)=`$\alpha$`op(B)`.
2307    ///
2308    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2309    ///
2310    /// ![image9](_images/op-a-non-xpose-1.png)
2311    ///
2312    /// Although there are six combinations in terms of parameter `trans` and the upper (and lower) triangular part of `A`, `bsrsm2_bufferSize()` returns the maximum size of the buffer among these combinations. The buffer size depends on dimension `mb,blockDim` and the number of nonzeros of the matrix, `nnzb`. If the user changes the matrix, it is necessary to call `bsrsm2_bufferSize()` again to get the correct buffer size, otherwise a segmentation fault may occur.
2313    ///
2314    /// * The routine requires no extra storage.
2315    /// * The routine supports asynchronous execution.
2316    /// * The routine supports CUDA graph capture.
2317    pub fn cusparseZbsrsm2_bufferSize(
2318        handle: cusparseHandle_t,
2319        dirA: cusparseDirection_t,
2320        transA: cusparseOperation_t,
2321        transXY: cusparseOperation_t,
2322        mb: ::core::ffi::c_int,
2323        n: ::core::ffi::c_int,
2324        nnzb: ::core::ffi::c_int,
2325        descrA: cusparseMatDescr_t,
2326        bsrSortedVal: *mut cuDoubleComplex,
2327        bsrSortedRowPtr: *const ::core::ffi::c_int,
2328        bsrSortedColInd: *const ::core::ffi::c_int,
2329        blockSize: ::core::ffi::c_int,
2330        info: bsrsm2Info_t,
2331        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2332    ) -> cusparseStatus_t;
2333}
2334unsafe extern "C" {
2335    pub fn cusparseSbsrsm2_bufferSizeExt(
2336        handle: cusparseHandle_t,
2337        dirA: cusparseDirection_t,
2338        transA: cusparseOperation_t,
2339        transB: cusparseOperation_t,
2340        mb: ::core::ffi::c_int,
2341        n: ::core::ffi::c_int,
2342        nnzb: ::core::ffi::c_int,
2343        descrA: cusparseMatDescr_t,
2344        bsrSortedVal: *mut f32,
2345        bsrSortedRowPtr: *const ::core::ffi::c_int,
2346        bsrSortedColInd: *const ::core::ffi::c_int,
2347        blockSize: ::core::ffi::c_int,
2348        info: bsrsm2Info_t,
2349        pBufferSize: *mut size_t,
2350    ) -> cusparseStatus_t;
2351}
2352unsafe extern "C" {
2353    pub fn cusparseDbsrsm2_bufferSizeExt(
2354        handle: cusparseHandle_t,
2355        dirA: cusparseDirection_t,
2356        transA: cusparseOperation_t,
2357        transB: cusparseOperation_t,
2358        mb: ::core::ffi::c_int,
2359        n: ::core::ffi::c_int,
2360        nnzb: ::core::ffi::c_int,
2361        descrA: cusparseMatDescr_t,
2362        bsrSortedVal: *mut f64,
2363        bsrSortedRowPtr: *const ::core::ffi::c_int,
2364        bsrSortedColInd: *const ::core::ffi::c_int,
2365        blockSize: ::core::ffi::c_int,
2366        info: bsrsm2Info_t,
2367        pBufferSize: *mut size_t,
2368    ) -> cusparseStatus_t;
2369}
2370unsafe extern "C" {
2371    pub fn cusparseCbsrsm2_bufferSizeExt(
2372        handle: cusparseHandle_t,
2373        dirA: cusparseDirection_t,
2374        transA: cusparseOperation_t,
2375        transB: cusparseOperation_t,
2376        mb: ::core::ffi::c_int,
2377        n: ::core::ffi::c_int,
2378        nnzb: ::core::ffi::c_int,
2379        descrA: cusparseMatDescr_t,
2380        bsrSortedVal: *mut cuComplex,
2381        bsrSortedRowPtr: *const ::core::ffi::c_int,
2382        bsrSortedColInd: *const ::core::ffi::c_int,
2383        blockSize: ::core::ffi::c_int,
2384        info: bsrsm2Info_t,
2385        pBufferSize: *mut size_t,
2386    ) -> cusparseStatus_t;
2387}
2388unsafe extern "C" {
2389    pub fn cusparseZbsrsm2_bufferSizeExt(
2390        handle: cusparseHandle_t,
2391        dirA: cusparseDirection_t,
2392        transA: cusparseOperation_t,
2393        transB: cusparseOperation_t,
2394        mb: ::core::ffi::c_int,
2395        n: ::core::ffi::c_int,
2396        nnzb: ::core::ffi::c_int,
2397        descrA: cusparseMatDescr_t,
2398        bsrSortedVal: *mut cuDoubleComplex,
2399        bsrSortedRowPtr: *const ::core::ffi::c_int,
2400        bsrSortedColInd: *const ::core::ffi::c_int,
2401        blockSize: ::core::ffi::c_int,
2402        info: bsrsm2Info_t,
2403        pBufferSize: *mut size_t,
2404    ) -> cusparseStatus_t;
2405}
2406unsafe extern "C" {
2407    /// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
2408    ///
2409    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2410    ///
2411    /// ![image9](_images/op-a-non-xpose-1.png)
2412    ///
2413    /// and
2414    ///
2415    /// ![image5](_images/op-x-notsupported.png)
2416    ///
2417    /// and `op(B)` and `op(X)` are equal.
2418    ///
2419    /// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
2420    ///
2421    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
2422    ///
2423    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2424    ///
2425    /// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
2426    ///
2427    /// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
2428    ///
2429    /// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
2430    ///
2431    /// * This function requires temporary extra storage that is allocated internally.
2432    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
2433    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2434    pub fn cusparseSbsrsm2_analysis(
2435        handle: cusparseHandle_t,
2436        dirA: cusparseDirection_t,
2437        transA: cusparseOperation_t,
2438        transXY: cusparseOperation_t,
2439        mb: ::core::ffi::c_int,
2440        n: ::core::ffi::c_int,
2441        nnzb: ::core::ffi::c_int,
2442        descrA: cusparseMatDescr_t,
2443        bsrSortedVal: *const f32,
2444        bsrSortedRowPtr: *const ::core::ffi::c_int,
2445        bsrSortedColInd: *const ::core::ffi::c_int,
2446        blockSize: ::core::ffi::c_int,
2447        info: bsrsm2Info_t,
2448        policy: cusparseSolvePolicy_t,
2449        pBuffer: *mut ::core::ffi::c_void,
2450    ) -> cusparseStatus_t;
2451}
2452unsafe extern "C" {
2453    /// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
2454    ///
2455    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2456    ///
2457    /// ![image9](_images/op-a-non-xpose-1.png)
2458    ///
2459    /// and
2460    ///
2461    /// ![image5](_images/op-x-notsupported.png)
2462    ///
2463    /// and `op(B)` and `op(X)` are equal.
2464    ///
2465    /// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
2466    ///
2467    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
2468    ///
2469    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2470    ///
2471    /// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
2472    ///
2473    /// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
2474    ///
2475    /// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
2476    ///
2477    /// * This function requires temporary extra storage that is allocated internally.
2478    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
2479    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2480    pub fn cusparseDbsrsm2_analysis(
2481        handle: cusparseHandle_t,
2482        dirA: cusparseDirection_t,
2483        transA: cusparseOperation_t,
2484        transXY: cusparseOperation_t,
2485        mb: ::core::ffi::c_int,
2486        n: ::core::ffi::c_int,
2487        nnzb: ::core::ffi::c_int,
2488        descrA: cusparseMatDescr_t,
2489        bsrSortedVal: *const f64,
2490        bsrSortedRowPtr: *const ::core::ffi::c_int,
2491        bsrSortedColInd: *const ::core::ffi::c_int,
2492        blockSize: ::core::ffi::c_int,
2493        info: bsrsm2Info_t,
2494        policy: cusparseSolvePolicy_t,
2495        pBuffer: *mut ::core::ffi::c_void,
2496    ) -> cusparseStatus_t;
2497}
2498unsafe extern "C" {
2499    /// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
2500    ///
2501    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2502    ///
2503    /// ![image9](_images/op-a-non-xpose-1.png)
2504    ///
2505    /// and
2506    ///
2507    /// ![image5](_images/op-x-notsupported.png)
2508    ///
2509    /// and `op(B)` and `op(X)` are equal.
2510    ///
2511    /// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
2512    ///
2513    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
2514    ///
2515    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2516    ///
2517    /// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
2518    ///
2519    /// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
2520    ///
2521    /// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
2522    ///
2523    /// * This function requires temporary extra storage that is allocated internally.
2524    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
2525    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2526    pub fn cusparseCbsrsm2_analysis(
2527        handle: cusparseHandle_t,
2528        dirA: cusparseDirection_t,
2529        transA: cusparseOperation_t,
2530        transXY: cusparseOperation_t,
2531        mb: ::core::ffi::c_int,
2532        n: ::core::ffi::c_int,
2533        nnzb: ::core::ffi::c_int,
2534        descrA: cusparseMatDescr_t,
2535        bsrSortedVal: *const cuComplex,
2536        bsrSortedRowPtr: *const ::core::ffi::c_int,
2537        bsrSortedColInd: *const ::core::ffi::c_int,
2538        blockSize: ::core::ffi::c_int,
2539        info: bsrsm2Info_t,
2540        policy: cusparseSolvePolicy_t,
2541        pBuffer: *mut ::core::ffi::c_void,
2542    ) -> cusparseStatus_t;
2543}
2544unsafe extern "C" {
2545    /// This function performs the analysis phase of `bsrsm2()`, a new sparse triangular linear system `op(A)*op(X) =`$\alpha$`op(B)`.
2546    ///
2547    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar; and
2548    ///
2549    /// ![image9](_images/op-a-non-xpose-1.png)
2550    ///
2551    /// and
2552    ///
2553    /// ![image5](_images/op-x-notsupported.png)
2554    ///
2555    /// and `op(B)` and `op(X)` are equal.
2556    ///
2557    /// The block of BSR format is of size `blockDim*blockDim`, stored in column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
2558    ///
2559    /// It is expected that this function will be executed only once for a given matrix and a particular operation type.
2560    ///
2561    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2562    ///
2563    /// Function `bsrsm2_analysis()` reports a structural zero and computes the level information stored in opaque structure `info`. The level information can extract more parallelism during a triangular solver. However `bsrsm2_solve()` can be done without level information. To disable level information, the user needs to specify the policy of the triangular solver as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
2564    ///
2565    /// Function `bsrsm2_analysis()` always reports the first structural zero, even if the parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. Besides, no structural zero is reported if [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] is specified, even if block `A(j,j)` is missing for some `j`. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the structural zero is.
2566    ///
2567    /// If `bsrsm2_analysis()` reports a structural zero, the solve will return a numerical zero in the same position as the structural zero but this result `X` is meaningless.
2568    ///
2569    /// * This function requires temporary extra storage that is allocated internally.
2570    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
2571    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2572    pub fn cusparseZbsrsm2_analysis(
2573        handle: cusparseHandle_t,
2574        dirA: cusparseDirection_t,
2575        transA: cusparseOperation_t,
2576        transXY: cusparseOperation_t,
2577        mb: ::core::ffi::c_int,
2578        n: ::core::ffi::c_int,
2579        nnzb: ::core::ffi::c_int,
2580        descrA: cusparseMatDescr_t,
2581        bsrSortedVal: *const cuDoubleComplex,
2582        bsrSortedRowPtr: *const ::core::ffi::c_int,
2583        bsrSortedColInd: *const ::core::ffi::c_int,
2584        blockSize: ::core::ffi::c_int,
2585        info: bsrsm2Info_t,
2586        policy: cusparseSolvePolicy_t,
2587        pBuffer: *mut ::core::ffi::c_void,
2588    ) -> cusparseStatus_t;
2589}
2590unsafe extern "C" {
2591    /// This function performs the solve phase of the solution of a sparse triangular linear system:
2592    ///
2593    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
2594    ///
2595    /// ![image9](_images/op-a-non-xpose-1.png)
2596    ///
2597    /// and
2598    ///
2599    /// ![image6](_images/op-x-notsupported-3.png)
2600    ///
2601    /// Only `op(A)=A` is supported.
2602    ///
2603    /// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
2604    ///
2605    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
2606    ///
2607    /// This function may be executed multiple times for a given matrix and a particular operation type.
2608    ///
2609    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2610    ///
2611    /// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2612    ///
2613    /// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
2614    ///
2615    /// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
2616    ///
2617    /// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
2618    ///
2619    /// The function supports the following properties if `pBuffer != NULL`:
2620    ///
2621    /// * The routine requires no extra storage.
2622    /// * The routine supports asynchronous execution.
2623    /// * The routine supports CUDA graph capture.
2624    pub fn cusparseSbsrsm2_solve(
2625        handle: cusparseHandle_t,
2626        dirA: cusparseDirection_t,
2627        transA: cusparseOperation_t,
2628        transXY: cusparseOperation_t,
2629        mb: ::core::ffi::c_int,
2630        n: ::core::ffi::c_int,
2631        nnzb: ::core::ffi::c_int,
2632        alpha: *const f32,
2633        descrA: cusparseMatDescr_t,
2634        bsrSortedVal: *const f32,
2635        bsrSortedRowPtr: *const ::core::ffi::c_int,
2636        bsrSortedColInd: *const ::core::ffi::c_int,
2637        blockSize: ::core::ffi::c_int,
2638        info: bsrsm2Info_t,
2639        B: *const f32,
2640        ldb: ::core::ffi::c_int,
2641        X: *mut f32,
2642        ldx: ::core::ffi::c_int,
2643        policy: cusparseSolvePolicy_t,
2644        pBuffer: *mut ::core::ffi::c_void,
2645    ) -> cusparseStatus_t;
2646}
2647unsafe extern "C" {
2648    /// This function performs the solve phase of the solution of a sparse triangular linear system:
2649    ///
2650    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
2651    ///
2652    /// ![image9](_images/op-a-non-xpose-1.png)
2653    ///
2654    /// and
2655    ///
2656    /// ![image6](_images/op-x-notsupported-3.png)
2657    ///
2658    /// Only `op(A)=A` is supported.
2659    ///
2660    /// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
2661    ///
2662    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
2663    ///
2664    /// This function may be executed multiple times for a given matrix and a particular operation type.
2665    ///
2666    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2667    ///
2668    /// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2669    ///
2670    /// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
2671    ///
2672    /// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
2673    ///
2674    /// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
2675    ///
2676    /// The function supports the following properties if `pBuffer != NULL`:
2677    ///
2678    /// * The routine requires no extra storage.
2679    /// * The routine supports asynchronous execution.
2680    /// * The routine supports CUDA graph capture.
2681    pub fn cusparseDbsrsm2_solve(
2682        handle: cusparseHandle_t,
2683        dirA: cusparseDirection_t,
2684        transA: cusparseOperation_t,
2685        transXY: cusparseOperation_t,
2686        mb: ::core::ffi::c_int,
2687        n: ::core::ffi::c_int,
2688        nnzb: ::core::ffi::c_int,
2689        alpha: *const f64,
2690        descrA: cusparseMatDescr_t,
2691        bsrSortedVal: *const f64,
2692        bsrSortedRowPtr: *const ::core::ffi::c_int,
2693        bsrSortedColInd: *const ::core::ffi::c_int,
2694        blockSize: ::core::ffi::c_int,
2695        info: bsrsm2Info_t,
2696        B: *const f64,
2697        ldb: ::core::ffi::c_int,
2698        X: *mut f64,
2699        ldx: ::core::ffi::c_int,
2700        policy: cusparseSolvePolicy_t,
2701        pBuffer: *mut ::core::ffi::c_void,
2702    ) -> cusparseStatus_t;
2703}
2704unsafe extern "C" {
2705    /// This function performs the solve phase of the solution of a sparse triangular linear system:
2706    ///
2707    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
2708    ///
2709    /// ![image9](_images/op-a-non-xpose-1.png)
2710    ///
2711    /// and
2712    ///
2713    /// ![image6](_images/op-x-notsupported-3.png)
2714    ///
2715    /// Only `op(A)=A` is supported.
2716    ///
2717    /// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
2718    ///
2719    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
2720    ///
2721    /// This function may be executed multiple times for a given matrix and a particular operation type.
2722    ///
2723    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2724    ///
2725    /// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2726    ///
2727    /// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
2728    ///
2729    /// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
2730    ///
2731    /// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
2732    ///
2733    /// The function supports the following properties if `pBuffer != NULL`:
2734    ///
2735    /// * The routine requires no extra storage.
2736    /// * The routine supports asynchronous execution.
2737    /// * The routine supports CUDA graph capture.
2738    pub fn cusparseCbsrsm2_solve(
2739        handle: cusparseHandle_t,
2740        dirA: cusparseDirection_t,
2741        transA: cusparseOperation_t,
2742        transXY: cusparseOperation_t,
2743        mb: ::core::ffi::c_int,
2744        n: ::core::ffi::c_int,
2745        nnzb: ::core::ffi::c_int,
2746        alpha: *const cuComplex,
2747        descrA: cusparseMatDescr_t,
2748        bsrSortedVal: *const cuComplex,
2749        bsrSortedRowPtr: *const ::core::ffi::c_int,
2750        bsrSortedColInd: *const ::core::ffi::c_int,
2751        blockSize: ::core::ffi::c_int,
2752        info: bsrsm2Info_t,
2753        B: *const cuComplex,
2754        ldb: ::core::ffi::c_int,
2755        X: *mut cuComplex,
2756        ldx: ::core::ffi::c_int,
2757        policy: cusparseSolvePolicy_t,
2758        pBuffer: *mut ::core::ffi::c_void,
2759    ) -> cusparseStatus_t;
2760}
2761unsafe extern "C" {
2762    /// This function performs the solve phase of the solution of a sparse triangular linear system:
2763    ///
2764    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`); `B` and `X` are the right-hand-side and the solution matrices; $\alpha$ is a scalar, and
2765    ///
2766    /// ![image9](_images/op-a-non-xpose-1.png)
2767    ///
2768    /// and
2769    ///
2770    /// ![image6](_images/op-x-notsupported-3.png)
2771    ///
2772    /// Only `op(A)=A` is supported.
2773    ///
2774    /// `op(B)` and `op(X)` must be performed in the same way. In other words, if `op(B)=B`, `op(X)=X`.
2775    ///
2776    /// The block of BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrsm02_solve()` can support an arbitrary `blockDim`.
2777    ///
2778    /// This function may be executed multiple times for a given matrix and a particular operation type.
2779    ///
2780    /// This function requires the buffer size returned by `bsrsm2_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2781    ///
2782    /// Although `bsrsm2_solve()` can be done without level information, the user still needs to be aware of consistency. If `bsrsm2_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrsm2_solve()` can be run with or without levels. On the other hand, if `bsrsm2_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrsm2_solve()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
2783    ///
2784    /// Function `bsrsm02_solve()` has the same behavior as `bsrsv02_solve()`, reporting the first numerical zero, including a structural zero. The user must call `cusparseXbsrsm2_query_zero_pivot()` to know where the numerical zero is.
2785    ///
2786    /// The motivation of `transpose(X)` is to improve the memory access of matrix `X`. The computational pattern of `transpose(X)` with matrix `X` in column-major order is equivalent to `X` with matrix `X` in row-major order.
2787    ///
2788    /// In-place is supported and requires that `B` and `X` point to the same memory block, and `ldb=ldx`.
2789    ///
2790    /// The function supports the following properties if `pBuffer != NULL`:
2791    ///
2792    /// * The routine requires no extra storage.
2793    /// * The routine supports asynchronous execution.
2794    /// * The routine supports CUDA graph capture.
2795    pub fn cusparseZbsrsm2_solve(
2796        handle: cusparseHandle_t,
2797        dirA: cusparseDirection_t,
2798        transA: cusparseOperation_t,
2799        transXY: cusparseOperation_t,
2800        mb: ::core::ffi::c_int,
2801        n: ::core::ffi::c_int,
2802        nnzb: ::core::ffi::c_int,
2803        alpha: *const cuDoubleComplex,
2804        descrA: cusparseMatDescr_t,
2805        bsrSortedVal: *const cuDoubleComplex,
2806        bsrSortedRowPtr: *const ::core::ffi::c_int,
2807        bsrSortedColInd: *const ::core::ffi::c_int,
2808        blockSize: ::core::ffi::c_int,
2809        info: bsrsm2Info_t,
2810        B: *const cuDoubleComplex,
2811        ldb: ::core::ffi::c_int,
2812        X: *mut cuDoubleComplex,
2813        ldx: ::core::ffi::c_int,
2814        policy: cusparseSolvePolicy_t,
2815        pBuffer: *mut ::core::ffi::c_void,
2816    ) -> cusparseStatus_t;
2817}
2818unsafe extern "C" {
2819    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
2820    ///
2821    /// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
2822    ///
2823    /// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
2824    ///
2825    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
2826    ///
2827    /// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
2828    ///
2829    /// * The routine requires no extra storage.
2830    /// * The routine supports asynchronous execution.
2831    /// * The routine supports CUDA graph capture.
2832    pub fn cusparseScsrilu02_numericBoost(
2833        handle: cusparseHandle_t,
2834        info: csrilu02Info_t,
2835        enable_boost: ::core::ffi::c_int,
2836        tol: *mut f64,
2837        boost_val: *mut f32,
2838    ) -> cusparseStatus_t;
2839}
2840unsafe extern "C" {
2841    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
2842    ///
2843    /// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
2844    ///
2845    /// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
2846    ///
2847    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
2848    ///
2849    /// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
2850    ///
2851    /// * The routine requires no extra storage.
2852    /// * The routine supports asynchronous execution.
2853    /// * The routine supports CUDA graph capture.
2854    pub fn cusparseDcsrilu02_numericBoost(
2855        handle: cusparseHandle_t,
2856        info: csrilu02Info_t,
2857        enable_boost: ::core::ffi::c_int,
2858        tol: *mut f64,
2859        boost_val: *mut f64,
2860    ) -> cusparseStatus_t;
2861}
2862unsafe extern "C" {
2863    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
2864    ///
2865    /// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
2866    ///
2867    /// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
2868    ///
2869    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
2870    ///
2871    /// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
2872    ///
2873    /// * The routine requires no extra storage.
2874    /// * The routine supports asynchronous execution.
2875    /// * The routine supports CUDA graph capture.
2876    pub fn cusparseCcsrilu02_numericBoost(
2877        handle: cusparseHandle_t,
2878        info: csrilu02Info_t,
2879        enable_boost: ::core::ffi::c_int,
2880        tol: *mut f64,
2881        boost_val: *mut cuComplex,
2882    ) -> cusparseStatus_t;
2883}
2884unsafe extern "C" {
2885    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. The `tol` is used to determine a numerical zero, and the `boost_val` is used to replace a numerical zero. The behavior is
2886    ///
2887    /// if `tol >= fabs(A(j,j))`, then `A(j,j)=boost_val`.
2888    ///
2889    /// To enable a boost value, the user has to set parameter `enable_boost` to 1 before calling `csrilu02()`. To disable a boost value, the user can call `csrilu02_numericBoost()` again with parameter `enable_boost=0`.
2890    ///
2891    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
2892    ///
2893    /// Both `tol` and `boost_val` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
2894    ///
2895    /// * The routine requires no extra storage.
2896    /// * The routine supports asynchronous execution.
2897    /// * The routine supports CUDA graph capture.
2898    pub fn cusparseZcsrilu02_numericBoost(
2899        handle: cusparseHandle_t,
2900        info: csrilu02Info_t,
2901        enable_boost: ::core::ffi::c_int,
2902        tol: *mut f64,
2903        boost_val: *mut cuDoubleComplex,
2904    ) -> cusparseStatus_t;
2905}
2906unsafe extern "C" {
2907    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero; otherwise, `position=-1`.
2908    ///
2909    /// The `position` can be 0-based or 1-based, the same as the matrix.
2910    ///
2911    /// Function [`cusparseXcsrilu02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize(`) to make sure all previous kernels are done.
2912    ///
2913    /// The `position` can be in the host memory or device memory. The user can set proper mode with [`cusparseSetPointerMode`].
2914    ///
2915    /// * The routine requires no extra storage
2916    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
2917    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
2918    ///
2919    /// # Parameters
2920    ///
2921    /// - `position`: If no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is zero, `position=j`.
2922    #[deprecated]
2923    pub fn cusparseXcsrilu02_zeroPivot(
2924        handle: cusparseHandle_t,
2925        info: csrilu02Info_t,
2926        position: *mut ::core::ffi::c_int,
2927    ) -> cusparseStatus_t;
2928}
2929unsafe extern "C" {
2930    /// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
2931    ///
2932    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
2933    ///
2934    /// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
2935    ///
2936    /// * The routine requires no extra storage.
2937    /// * The routine supports asynchronous execution.
2938    /// * The routine supports CUDA graph capture.
2939    pub fn cusparseScsrilu02_bufferSize(
2940        handle: cusparseHandle_t,
2941        m: ::core::ffi::c_int,
2942        nnz: ::core::ffi::c_int,
2943        descrA: cusparseMatDescr_t,
2944        csrSortedValA: *mut f32,
2945        csrSortedRowPtrA: *const ::core::ffi::c_int,
2946        csrSortedColIndA: *const ::core::ffi::c_int,
2947        info: csrilu02Info_t,
2948        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2949    ) -> cusparseStatus_t;
2950}
2951unsafe extern "C" {
2952    /// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
2953    ///
2954    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
2955    ///
2956    /// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
2957    ///
2958    /// * The routine requires no extra storage.
2959    /// * The routine supports asynchronous execution.
2960    /// * The routine supports CUDA graph capture.
2961    pub fn cusparseDcsrilu02_bufferSize(
2962        handle: cusparseHandle_t,
2963        m: ::core::ffi::c_int,
2964        nnz: ::core::ffi::c_int,
2965        descrA: cusparseMatDescr_t,
2966        csrSortedValA: *mut f64,
2967        csrSortedRowPtrA: *const ::core::ffi::c_int,
2968        csrSortedColIndA: *const ::core::ffi::c_int,
2969        info: csrilu02Info_t,
2970        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2971    ) -> cusparseStatus_t;
2972}
2973unsafe extern "C" {
2974    /// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
2975    ///
2976    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
2977    ///
2978    /// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
2979    ///
2980    /// * The routine requires no extra storage.
2981    /// * The routine supports asynchronous execution.
2982    /// * The routine supports CUDA graph capture.
2983    pub fn cusparseCcsrilu02_bufferSize(
2984        handle: cusparseHandle_t,
2985        m: ::core::ffi::c_int,
2986        nnz: ::core::ffi::c_int,
2987        descrA: cusparseMatDescr_t,
2988        csrSortedValA: *mut cuComplex,
2989        csrSortedRowPtrA: *const ::core::ffi::c_int,
2990        csrSortedColIndA: *const ::core::ffi::c_int,
2991        info: csrilu02Info_t,
2992        pBufferSizeInBytes: *mut ::core::ffi::c_int,
2993    ) -> cusparseStatus_t;
2994}
2995unsafe extern "C" {
2996    /// This function returns size of the buffer used in computing the incomplete-LU factorization with $0$ fill-in and no pivoting:
2997    ///
2998    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
2999    ///
3000    /// The buffer size depends on the dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3001    ///
3002    /// * The routine requires no extra storage.
3003    /// * The routine supports asynchronous execution.
3004    /// * The routine supports CUDA graph capture.
3005    pub fn cusparseZcsrilu02_bufferSize(
3006        handle: cusparseHandle_t,
3007        m: ::core::ffi::c_int,
3008        nnz: ::core::ffi::c_int,
3009        descrA: cusparseMatDescr_t,
3010        csrSortedValA: *mut cuDoubleComplex,
3011        csrSortedRowPtrA: *const ::core::ffi::c_int,
3012        csrSortedColIndA: *const ::core::ffi::c_int,
3013        info: csrilu02Info_t,
3014        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3015    ) -> cusparseStatus_t;
3016}
3017unsafe extern "C" {
3018    pub fn cusparseScsrilu02_bufferSizeExt(
3019        handle: cusparseHandle_t,
3020        m: ::core::ffi::c_int,
3021        nnz: ::core::ffi::c_int,
3022        descrA: cusparseMatDescr_t,
3023        csrSortedVal: *mut f32,
3024        csrSortedRowPtr: *const ::core::ffi::c_int,
3025        csrSortedColInd: *const ::core::ffi::c_int,
3026        info: csrilu02Info_t,
3027        pBufferSize: *mut size_t,
3028    ) -> cusparseStatus_t;
3029}
3030unsafe extern "C" {
3031    pub fn cusparseDcsrilu02_bufferSizeExt(
3032        handle: cusparseHandle_t,
3033        m: ::core::ffi::c_int,
3034        nnz: ::core::ffi::c_int,
3035        descrA: cusparseMatDescr_t,
3036        csrSortedVal: *mut f64,
3037        csrSortedRowPtr: *const ::core::ffi::c_int,
3038        csrSortedColInd: *const ::core::ffi::c_int,
3039        info: csrilu02Info_t,
3040        pBufferSize: *mut size_t,
3041    ) -> cusparseStatus_t;
3042}
3043unsafe extern "C" {
3044    pub fn cusparseCcsrilu02_bufferSizeExt(
3045        handle: cusparseHandle_t,
3046        m: ::core::ffi::c_int,
3047        nnz: ::core::ffi::c_int,
3048        descrA: cusparseMatDescr_t,
3049        csrSortedVal: *mut cuComplex,
3050        csrSortedRowPtr: *const ::core::ffi::c_int,
3051        csrSortedColInd: *const ::core::ffi::c_int,
3052        info: csrilu02Info_t,
3053        pBufferSize: *mut size_t,
3054    ) -> cusparseStatus_t;
3055}
3056unsafe extern "C" {
3057    pub fn cusparseZcsrilu02_bufferSizeExt(
3058        handle: cusparseHandle_t,
3059        m: ::core::ffi::c_int,
3060        nnz: ::core::ffi::c_int,
3061        descrA: cusparseMatDescr_t,
3062        csrSortedVal: *mut cuDoubleComplex,
3063        csrSortedRowPtr: *const ::core::ffi::c_int,
3064        csrSortedColInd: *const ::core::ffi::c_int,
3065        info: csrilu02Info_t,
3066        pBufferSize: *mut size_t,
3067    ) -> cusparseStatus_t;
3068}
3069unsafe extern "C" {
3070    /// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3071    ///
3072    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3073    ///
3074    /// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3075    ///
3076    /// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3077    ///
3078    /// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
3079    ///
3080    /// * This function requires temporary extra storage that is allocated internally.
3081    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3082    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3083    pub fn cusparseScsrilu02_analysis(
3084        handle: cusparseHandle_t,
3085        m: ::core::ffi::c_int,
3086        nnz: ::core::ffi::c_int,
3087        descrA: cusparseMatDescr_t,
3088        csrSortedValA: *const f32,
3089        csrSortedRowPtrA: *const ::core::ffi::c_int,
3090        csrSortedColIndA: *const ::core::ffi::c_int,
3091        info: csrilu02Info_t,
3092        policy: cusparseSolvePolicy_t,
3093        pBuffer: *mut ::core::ffi::c_void,
3094    ) -> cusparseStatus_t;
3095}
3096unsafe extern "C" {
3097    /// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3098    ///
3099    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3100    ///
3101    /// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3102    ///
3103    /// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3104    ///
3105    /// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
3106    ///
3107    /// * This function requires temporary extra storage that is allocated internally.
3108    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3109    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3110    pub fn cusparseDcsrilu02_analysis(
3111        handle: cusparseHandle_t,
3112        m: ::core::ffi::c_int,
3113        nnz: ::core::ffi::c_int,
3114        descrA: cusparseMatDescr_t,
3115        csrSortedValA: *const f64,
3116        csrSortedRowPtrA: *const ::core::ffi::c_int,
3117        csrSortedColIndA: *const ::core::ffi::c_int,
3118        info: csrilu02Info_t,
3119        policy: cusparseSolvePolicy_t,
3120        pBuffer: *mut ::core::ffi::c_void,
3121    ) -> cusparseStatus_t;
3122}
3123unsafe extern "C" {
3124    /// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3125    ///
3126    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3127    ///
3128    /// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3129    ///
3130    /// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3131    ///
3132    /// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
3133    ///
3134    /// * This function requires temporary extra storage that is allocated internally.
3135    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3136    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3137    pub fn cusparseCcsrilu02_analysis(
3138        handle: cusparseHandle_t,
3139        m: ::core::ffi::c_int,
3140        nnz: ::core::ffi::c_int,
3141        descrA: cusparseMatDescr_t,
3142        csrSortedValA: *const cuComplex,
3143        csrSortedRowPtrA: *const ::core::ffi::c_int,
3144        csrSortedColIndA: *const ::core::ffi::c_int,
3145        info: csrilu02Info_t,
3146        policy: cusparseSolvePolicy_t,
3147        pBuffer: *mut ::core::ffi::c_void,
3148    ) -> cusparseStatus_t;
3149}
3150unsafe extern "C" {
3151    /// This function performs the analysis phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3152    ///
3153    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3154    ///
3155    /// This function requires the buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3156    ///
3157    /// Function `csrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization; however `csrilu02()` can be done without level information. To disable level information, the user must specify the policy of `csrilu02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3158    ///
3159    /// It is the user’s choice whether to call `csrilu02()` if `csrilu02_analysis()` reports a structural zero. In this case, the user can still call `csrilu02()`, which will return a numerical zero at the same position as the structural zero. However, the result is meaningless.
3160    ///
3161    /// * This function requires temporary extra storage that is allocated internally.
3162    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3163    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3164    pub fn cusparseZcsrilu02_analysis(
3165        handle: cusparseHandle_t,
3166        m: ::core::ffi::c_int,
3167        nnz: ::core::ffi::c_int,
3168        descrA: cusparseMatDescr_t,
3169        csrSortedValA: *const cuDoubleComplex,
3170        csrSortedRowPtrA: *const ::core::ffi::c_int,
3171        csrSortedColIndA: *const ::core::ffi::c_int,
3172        info: csrilu02Info_t,
3173        policy: cusparseSolvePolicy_t,
3174        pBuffer: *mut ::core::ffi::c_void,
3175    ) -> cusparseStatus_t;
3176}
3177unsafe extern "C" {
3178    /// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3179    ///
3180    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
3181    ///
3182    /// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3183    ///
3184    /// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
3185    ///
3186    /// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3187    ///
3188    /// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
3189    ///
3190    /// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
3191    ///
3192    /// The function supports the following properties if `pBuffer != NULL`:
3193    ///
3194    /// * The routine requires no extra storage
3195    /// * The routine supports asynchronous execution
3196    /// * The routine supports CUDA graph capture.
3197    pub fn cusparseScsrilu02(
3198        handle: cusparseHandle_t,
3199        m: ::core::ffi::c_int,
3200        nnz: ::core::ffi::c_int,
3201        descrA: cusparseMatDescr_t,
3202        csrSortedValA_valM: *mut f32,
3203        csrSortedRowPtrA: *const ::core::ffi::c_int,
3204        csrSortedColIndA: *const ::core::ffi::c_int,
3205        info: csrilu02Info_t,
3206        policy: cusparseSolvePolicy_t,
3207        pBuffer: *mut ::core::ffi::c_void,
3208    ) -> cusparseStatus_t;
3209}
3210unsafe extern "C" {
3211    /// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3212    ///
3213    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
3214    ///
3215    /// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3216    ///
3217    /// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
3218    ///
3219    /// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3220    ///
3221    /// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
3222    ///
3223    /// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
3224    ///
3225    /// The function supports the following properties if `pBuffer != NULL`:
3226    ///
3227    /// * The routine requires no extra storage
3228    /// * The routine supports asynchronous execution
3229    /// * The routine supports CUDA graph capture.
3230    pub fn cusparseDcsrilu02(
3231        handle: cusparseHandle_t,
3232        m: ::core::ffi::c_int,
3233        nnz: ::core::ffi::c_int,
3234        descrA: cusparseMatDescr_t,
3235        csrSortedValA_valM: *mut f64,
3236        csrSortedRowPtrA: *const ::core::ffi::c_int,
3237        csrSortedColIndA: *const ::core::ffi::c_int,
3238        info: csrilu02Info_t,
3239        policy: cusparseSolvePolicy_t,
3240        pBuffer: *mut ::core::ffi::c_void,
3241    ) -> cusparseStatus_t;
3242}
3243unsafe extern "C" {
3244    /// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3245    ///
3246    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
3247    ///
3248    /// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3249    ///
3250    /// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
3251    ///
3252    /// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3253    ///
3254    /// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
3255    ///
3256    /// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
3257    ///
3258    /// The function supports the following properties if `pBuffer != NULL`:
3259    ///
3260    /// * The routine requires no extra storage
3261    /// * The routine supports asynchronous execution
3262    /// * The routine supports CUDA graph capture.
3263    pub fn cusparseCcsrilu02(
3264        handle: cusparseHandle_t,
3265        m: ::core::ffi::c_int,
3266        nnz: ::core::ffi::c_int,
3267        descrA: cusparseMatDescr_t,
3268        csrSortedValA_valM: *mut cuComplex,
3269        csrSortedRowPtrA: *const ::core::ffi::c_int,
3270        csrSortedColIndA: *const ::core::ffi::c_int,
3271        info: csrilu02Info_t,
3272        policy: cusparseSolvePolicy_t,
3273        pBuffer: *mut ::core::ffi::c_void,
3274    ) -> cusparseStatus_t;
3275}
3276unsafe extern "C" {
3277    /// This function performs the solve phase of the incomplete-LU factorization with $0$ fill-in and no pivoting:
3278    ///
3279    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA_valM`, `csrRowPtrA`, and `csrColIndA`.
3280    ///
3281    /// This function requires a buffer size returned by `csrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3282    ///
3283    /// The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`]. The fill mode and diagonal type are ignored.
3284    ///
3285    /// Although `csrilu02()` can be done without level information, the user still needs to be aware of consistency. If `csrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csrilu02()` can be run with or without levels. On the other hand, if `csrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3286    ///
3287    /// Function `csrilu02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsrilu02_zeroPivot`] to know where the numerical zero is.
3288    ///
3289    /// For example, suppose `A` is a real $m \times m$ matrix, the following code solves precondition system `M*y = x` where `M` is the product of LU factors `L` and `U`.
3290    ///
3291    /// The function supports the following properties if `pBuffer != NULL`:
3292    ///
3293    /// * The routine requires no extra storage
3294    /// * The routine supports asynchronous execution
3295    /// * The routine supports CUDA graph capture.
3296    pub fn cusparseZcsrilu02(
3297        handle: cusparseHandle_t,
3298        m: ::core::ffi::c_int,
3299        nnz: ::core::ffi::c_int,
3300        descrA: cusparseMatDescr_t,
3301        csrSortedValA_valM: *mut cuDoubleComplex,
3302        csrSortedRowPtrA: *const ::core::ffi::c_int,
3303        csrSortedColIndA: *const ::core::ffi::c_int,
3304        info: csrilu02Info_t,
3305        policy: cusparseSolvePolicy_t,
3306        pBuffer: *mut ::core::ffi::c_void,
3307    ) -> cusparseStatus_t;
3308}
3309unsafe extern "C" {
3310    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
3311    ///
3312    /// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
3313    ///
3314    /// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
3315    ///
3316    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
3317    ///
3318    /// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
3319    ///
3320    /// * The routine requires no extra storage.
3321    /// * The routine supports asynchronous execution.
3322    /// * The routine supports CUDA graph capture.
3323    pub fn cusparseSbsrilu02_numericBoost(
3324        handle: cusparseHandle_t,
3325        info: bsrilu02Info_t,
3326        enable_boost: ::core::ffi::c_int,
3327        tol: *mut f64,
3328        boost_val: *mut f32,
3329    ) -> cusparseStatus_t;
3330}
3331unsafe extern "C" {
3332    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
3333    ///
3334    /// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
3335    ///
3336    /// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
3337    ///
3338    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
3339    ///
3340    /// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
3341    ///
3342    /// * The routine requires no extra storage.
3343    /// * The routine supports asynchronous execution.
3344    /// * The routine supports CUDA graph capture.
3345    pub fn cusparseDbsrilu02_numericBoost(
3346        handle: cusparseHandle_t,
3347        info: bsrilu02Info_t,
3348        enable_boost: ::core::ffi::c_int,
3349        tol: *mut f64,
3350        boost_val: *mut f64,
3351    ) -> cusparseStatus_t;
3352}
3353unsafe extern "C" {
3354    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
3355    ///
3356    /// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
3357    ///
3358    /// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
3359    ///
3360    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
3361    ///
3362    /// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
3363    ///
3364    /// * The routine requires no extra storage.
3365    /// * The routine supports asynchronous execution.
3366    /// * The routine supports CUDA graph capture.
3367    pub fn cusparseCbsrilu02_numericBoost(
3368        handle: cusparseHandle_t,
3369        info: bsrilu02Info_t,
3370        enable_boost: ::core::ffi::c_int,
3371        tol: *mut f64,
3372        boost_val: *mut cuComplex,
3373    ) -> cusparseStatus_t;
3374}
3375unsafe extern "C" {
3376    /// The user can use a boost value to replace a numerical value in incomplete LU factorization. Parameter `tol` is used to determine a numerical zero, and `boost_val` is used to replace a numerical zero. The behavior is as follows:
3377    ///
3378    /// if `tol >= fabs(A(j,j))`, then reset each diagonal element of block `A(j,j)` by `boost_val`.
3379    ///
3380    /// To enable a boost value, the user sets parameter `enable_boost` to 1 before calling `bsrilu02()`. To disable the boost value, the user can call `bsrilu02_numericBoost()` with parameter `enable_boost=0`.
3381    ///
3382    /// If `enable_boost=0`, `tol` and `boost_val` are ignored.
3383    ///
3384    /// Both `tol` and `boost_val` can be in host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
3385    ///
3386    /// * The routine requires no extra storage.
3387    /// * The routine supports asynchronous execution.
3388    /// * The routine supports CUDA graph capture.
3389    pub fn cusparseZbsrilu02_numericBoost(
3390        handle: cusparseHandle_t,
3391        info: bsrilu02Info_t,
3392        enable_boost: ::core::ffi::c_int,
3393        tol: *mut f64,
3394        boost_val: *mut cuDoubleComplex,
3395    ) -> cusparseStatus_t;
3396}
3397unsafe extern "C" {
3398    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero (the block is not invertible). Otherwise `position=-1`.
3399    ///
3400    /// The `position` can be 0-based or 1-based, the same as the matrix.
3401    ///
3402    /// Function [`cusparseXbsrilu02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
3403    ///
3404    /// The `position` can be in the host memory or device memory. The user can set proper the mode with [`cusparseSetPointerMode`].
3405    ///
3406    /// * The routine requires no extra storage.
3407    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3408    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3409    ///
3410    /// # Parameters
3411    ///
3412    /// - `position`: if no structural or numerical zero, `position` is -1; otherwise if `A(j,j)` is missing or `U(j,j)` is not invertible, `position=j`.
3413    #[deprecated]
3414    pub fn cusparseXbsrilu02_zeroPivot(
3415        handle: cusparseHandle_t,
3416        info: bsrilu02Info_t,
3417        position: *mut ::core::ffi::c_int,
3418    ) -> cusparseStatus_t;
3419}
3420unsafe extern "C" {
3421    /// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
3422    ///
3423    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
3424    ///
3425    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3426    ///
3427    /// **Status Returned**
3428    ///
3429    /// |  |  |
3430    /// | --- | --- |
3431    /// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
3432    /// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
3433    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
3434    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
3435    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
3436    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
3437    /// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
3438    pub fn cusparseSbsrilu02_bufferSize(
3439        handle: cusparseHandle_t,
3440        dirA: cusparseDirection_t,
3441        mb: ::core::ffi::c_int,
3442        nnzb: ::core::ffi::c_int,
3443        descrA: cusparseMatDescr_t,
3444        bsrSortedVal: *mut f32,
3445        bsrSortedRowPtr: *const ::core::ffi::c_int,
3446        bsrSortedColInd: *const ::core::ffi::c_int,
3447        blockDim: ::core::ffi::c_int,
3448        info: bsrilu02Info_t,
3449        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3450    ) -> cusparseStatus_t;
3451}
3452unsafe extern "C" {
3453    /// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
3454    ///
3455    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
3456    ///
3457    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3458    ///
3459    /// **Status Returned**
3460    ///
3461    /// |  |  |
3462    /// | --- | --- |
3463    /// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
3464    /// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
3465    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
3466    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
3467    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
3468    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
3469    /// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
3470    pub fn cusparseDbsrilu02_bufferSize(
3471        handle: cusparseHandle_t,
3472        dirA: cusparseDirection_t,
3473        mb: ::core::ffi::c_int,
3474        nnzb: ::core::ffi::c_int,
3475        descrA: cusparseMatDescr_t,
3476        bsrSortedVal: *mut f64,
3477        bsrSortedRowPtr: *const ::core::ffi::c_int,
3478        bsrSortedColInd: *const ::core::ffi::c_int,
3479        blockDim: ::core::ffi::c_int,
3480        info: bsrilu02Info_t,
3481        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3482    ) -> cusparseStatus_t;
3483}
3484unsafe extern "C" {
3485    /// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
3486    ///
3487    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
3488    ///
3489    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3490    ///
3491    /// **Status Returned**
3492    ///
3493    /// |  |  |
3494    /// | --- | --- |
3495    /// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
3496    /// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
3497    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
3498    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
3499    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
3500    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
3501    /// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
3502    pub fn cusparseCbsrilu02_bufferSize(
3503        handle: cusparseHandle_t,
3504        dirA: cusparseDirection_t,
3505        mb: ::core::ffi::c_int,
3506        nnzb: ::core::ffi::c_int,
3507        descrA: cusparseMatDescr_t,
3508        bsrSortedVal: *mut cuComplex,
3509        bsrSortedRowPtr: *const ::core::ffi::c_int,
3510        bsrSortedColInd: *const ::core::ffi::c_int,
3511        blockDim: ::core::ffi::c_int,
3512        info: bsrilu02Info_t,
3513        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3514    ) -> cusparseStatus_t;
3515}
3516unsafe extern "C" {
3517    /// This function returns the size of the buffer used in computing the incomplete-LU factorization with 0 fill-in and no pivoting.
3518    ///
3519    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
3520    ///
3521    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsrilu02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3522    ///
3523    /// **Status Returned**
3524    ///
3525    /// |  |  |
3526    /// | --- | --- |
3527    /// | [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`] | the operation completed successfully. |
3528    /// | [`cusparseStatus_t::CUSPARSE_STATUS_NOT_INITIALIZED`] | the library was not initialized. |
3529    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ALLOC_FAILED`] | the resources could not be allocated. |
3530    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] | invalid parameters were passed (`mb,nnzb<=0`), base index is not 0 or 1. |
3531    /// | [`cusparseStatus_t::CUSPARSE_STATUS_ARCH_MISMATCH`] | the device only supports compute capability 2.0 and above. |
3532    /// | [`cusparseStatus_t::CUSPARSE_STATUS_INTERNAL_ERROR`] | an internal operation failed. |
3533    /// | [`cusparseStatus_t::CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED`] | the matrix type is not supported. |
3534    pub fn cusparseZbsrilu02_bufferSize(
3535        handle: cusparseHandle_t,
3536        dirA: cusparseDirection_t,
3537        mb: ::core::ffi::c_int,
3538        nnzb: ::core::ffi::c_int,
3539        descrA: cusparseMatDescr_t,
3540        bsrSortedVal: *mut cuDoubleComplex,
3541        bsrSortedRowPtr: *const ::core::ffi::c_int,
3542        bsrSortedColInd: *const ::core::ffi::c_int,
3543        blockDim: ::core::ffi::c_int,
3544        info: bsrilu02Info_t,
3545        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3546    ) -> cusparseStatus_t;
3547}
3548unsafe extern "C" {
3549    pub fn cusparseSbsrilu02_bufferSizeExt(
3550        handle: cusparseHandle_t,
3551        dirA: cusparseDirection_t,
3552        mb: ::core::ffi::c_int,
3553        nnzb: ::core::ffi::c_int,
3554        descrA: cusparseMatDescr_t,
3555        bsrSortedVal: *mut f32,
3556        bsrSortedRowPtr: *const ::core::ffi::c_int,
3557        bsrSortedColInd: *const ::core::ffi::c_int,
3558        blockSize: ::core::ffi::c_int,
3559        info: bsrilu02Info_t,
3560        pBufferSize: *mut size_t,
3561    ) -> cusparseStatus_t;
3562}
3563unsafe extern "C" {
3564    pub fn cusparseDbsrilu02_bufferSizeExt(
3565        handle: cusparseHandle_t,
3566        dirA: cusparseDirection_t,
3567        mb: ::core::ffi::c_int,
3568        nnzb: ::core::ffi::c_int,
3569        descrA: cusparseMatDescr_t,
3570        bsrSortedVal: *mut f64,
3571        bsrSortedRowPtr: *const ::core::ffi::c_int,
3572        bsrSortedColInd: *const ::core::ffi::c_int,
3573        blockSize: ::core::ffi::c_int,
3574        info: bsrilu02Info_t,
3575        pBufferSize: *mut size_t,
3576    ) -> cusparseStatus_t;
3577}
3578unsafe extern "C" {
3579    pub fn cusparseCbsrilu02_bufferSizeExt(
3580        handle: cusparseHandle_t,
3581        dirA: cusparseDirection_t,
3582        mb: ::core::ffi::c_int,
3583        nnzb: ::core::ffi::c_int,
3584        descrA: cusparseMatDescr_t,
3585        bsrSortedVal: *mut cuComplex,
3586        bsrSortedRowPtr: *const ::core::ffi::c_int,
3587        bsrSortedColInd: *const ::core::ffi::c_int,
3588        blockSize: ::core::ffi::c_int,
3589        info: bsrilu02Info_t,
3590        pBufferSize: *mut size_t,
3591    ) -> cusparseStatus_t;
3592}
3593unsafe extern "C" {
3594    pub fn cusparseZbsrilu02_bufferSizeExt(
3595        handle: cusparseHandle_t,
3596        dirA: cusparseDirection_t,
3597        mb: ::core::ffi::c_int,
3598        nnzb: ::core::ffi::c_int,
3599        descrA: cusparseMatDescr_t,
3600        bsrSortedVal: *mut cuDoubleComplex,
3601        bsrSortedRowPtr: *const ::core::ffi::c_int,
3602        bsrSortedColInd: *const ::core::ffi::c_int,
3603        blockSize: ::core::ffi::c_int,
3604        info: bsrilu02Info_t,
3605        pBufferSize: *mut size_t,
3606    ) -> cusparseStatus_t;
3607}
3608unsafe extern "C" {
3609    /// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3610    ///
3611    /// |  |
3612    /// | --- |
3613    /// | $A \approx LU$ |
3614    ///
3615    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
3616    ///
3617    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3618    ///
3619    /// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3620    ///
3621    /// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
3622    ///
3623    /// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
3624    ///
3625    /// * This function requires temporary extra storage that is allocated internally.
3626    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3627    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3628    pub fn cusparseSbsrilu02_analysis(
3629        handle: cusparseHandle_t,
3630        dirA: cusparseDirection_t,
3631        mb: ::core::ffi::c_int,
3632        nnzb: ::core::ffi::c_int,
3633        descrA: cusparseMatDescr_t,
3634        bsrSortedVal: *mut f32,
3635        bsrSortedRowPtr: *const ::core::ffi::c_int,
3636        bsrSortedColInd: *const ::core::ffi::c_int,
3637        blockDim: ::core::ffi::c_int,
3638        info: bsrilu02Info_t,
3639        policy: cusparseSolvePolicy_t,
3640        pBuffer: *mut ::core::ffi::c_void,
3641    ) -> cusparseStatus_t;
3642}
3643unsafe extern "C" {
3644    /// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3645    ///
3646    /// |  |
3647    /// | --- |
3648    /// | $A \approx LU$ |
3649    ///
3650    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
3651    ///
3652    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3653    ///
3654    /// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3655    ///
3656    /// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
3657    ///
3658    /// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
3659    ///
3660    /// * This function requires temporary extra storage that is allocated internally.
3661    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3662    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3663    pub fn cusparseDbsrilu02_analysis(
3664        handle: cusparseHandle_t,
3665        dirA: cusparseDirection_t,
3666        mb: ::core::ffi::c_int,
3667        nnzb: ::core::ffi::c_int,
3668        descrA: cusparseMatDescr_t,
3669        bsrSortedVal: *mut f64,
3670        bsrSortedRowPtr: *const ::core::ffi::c_int,
3671        bsrSortedColInd: *const ::core::ffi::c_int,
3672        blockDim: ::core::ffi::c_int,
3673        info: bsrilu02Info_t,
3674        policy: cusparseSolvePolicy_t,
3675        pBuffer: *mut ::core::ffi::c_void,
3676    ) -> cusparseStatus_t;
3677}
3678unsafe extern "C" {
3679    /// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3680    ///
3681    /// |  |
3682    /// | --- |
3683    /// | $A \approx LU$ |
3684    ///
3685    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
3686    ///
3687    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3688    ///
3689    /// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3690    ///
3691    /// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
3692    ///
3693    /// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
3694    ///
3695    /// * This function requires temporary extra storage that is allocated internally.
3696    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3697    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3698    pub fn cusparseCbsrilu02_analysis(
3699        handle: cusparseHandle_t,
3700        dirA: cusparseDirection_t,
3701        mb: ::core::ffi::c_int,
3702        nnzb: ::core::ffi::c_int,
3703        descrA: cusparseMatDescr_t,
3704        bsrSortedVal: *mut cuComplex,
3705        bsrSortedRowPtr: *const ::core::ffi::c_int,
3706        bsrSortedColInd: *const ::core::ffi::c_int,
3707        blockDim: ::core::ffi::c_int,
3708        info: bsrilu02Info_t,
3709        policy: cusparseSolvePolicy_t,
3710        pBuffer: *mut ::core::ffi::c_void,
3711    ) -> cusparseStatus_t;
3712}
3713unsafe extern "C" {
3714    /// This function performs the analysis phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3715    ///
3716    /// |  |
3717    /// | --- |
3718    /// | $A \approx LU$ |
3719    ///
3720    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
3721    ///
3722    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3723    ///
3724    /// Function `bsrilu02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete LU factorization. However `bsrilu02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsrilu02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
3725    ///
3726    /// Function `bsrilu02_analysis()` always reports the first structural zero, even with parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the structural zero is.
3727    ///
3728    /// It is the user’s choice whether to call `bsrilu02()` if `bsrilu02_analysis()` reports a structural zero. In this case, the user can still call `bsrilu02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
3729    ///
3730    /// * This function requires temporary extra storage that is allocated internally.
3731    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3732    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3733    pub fn cusparseZbsrilu02_analysis(
3734        handle: cusparseHandle_t,
3735        dirA: cusparseDirection_t,
3736        mb: ::core::ffi::c_int,
3737        nnzb: ::core::ffi::c_int,
3738        descrA: cusparseMatDescr_t,
3739        bsrSortedVal: *mut cuDoubleComplex,
3740        bsrSortedRowPtr: *const ::core::ffi::c_int,
3741        bsrSortedColInd: *const ::core::ffi::c_int,
3742        blockDim: ::core::ffi::c_int,
3743        info: bsrilu02Info_t,
3744        policy: cusparseSolvePolicy_t,
3745        pBuffer: *mut ::core::ffi::c_void,
3746    ) -> cusparseStatus_t;
3747}
3748unsafe extern "C" {
3749    /// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3750    ///
3751    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
3752    ///
3753    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3754    ///
3755    /// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3756    ///
3757    /// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
3758    ///
3759    /// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
3760    ///
3761    /// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
3762    ///
3763    /// The function supports the following properties if `pBuffer != NULL`:
3764    ///
3765    /// * The routine requires no extra storage.
3766    /// * The routine supports asynchronous execution.
3767    /// * The routine supports CUDA graph capture.
3768    pub fn cusparseSbsrilu02(
3769        handle: cusparseHandle_t,
3770        dirA: cusparseDirection_t,
3771        mb: ::core::ffi::c_int,
3772        nnzb: ::core::ffi::c_int,
3773        descrA: cusparseMatDescr_t,
3774        bsrSortedVal: *mut f32,
3775        bsrSortedRowPtr: *const ::core::ffi::c_int,
3776        bsrSortedColInd: *const ::core::ffi::c_int,
3777        blockDim: ::core::ffi::c_int,
3778        info: bsrilu02Info_t,
3779        policy: cusparseSolvePolicy_t,
3780        pBuffer: *mut ::core::ffi::c_void,
3781    ) -> cusparseStatus_t;
3782}
3783unsafe extern "C" {
3784    /// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3785    ///
3786    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
3787    ///
3788    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3789    ///
3790    /// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3791    ///
3792    /// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
3793    ///
3794    /// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
3795    ///
3796    /// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
3797    ///
3798    /// The function supports the following properties if `pBuffer != NULL`:
3799    ///
3800    /// * The routine requires no extra storage.
3801    /// * The routine supports asynchronous execution.
3802    /// * The routine supports CUDA graph capture.
3803    pub fn cusparseDbsrilu02(
3804        handle: cusparseHandle_t,
3805        dirA: cusparseDirection_t,
3806        mb: ::core::ffi::c_int,
3807        nnzb: ::core::ffi::c_int,
3808        descrA: cusparseMatDescr_t,
3809        bsrSortedVal: *mut f64,
3810        bsrSortedRowPtr: *const ::core::ffi::c_int,
3811        bsrSortedColInd: *const ::core::ffi::c_int,
3812        blockDim: ::core::ffi::c_int,
3813        info: bsrilu02Info_t,
3814        policy: cusparseSolvePolicy_t,
3815        pBuffer: *mut ::core::ffi::c_void,
3816    ) -> cusparseStatus_t;
3817}
3818unsafe extern "C" {
3819    /// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3820    ///
3821    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
3822    ///
3823    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3824    ///
3825    /// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3826    ///
3827    /// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
3828    ///
3829    /// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
3830    ///
3831    /// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
3832    ///
3833    /// The function supports the following properties if `pBuffer != NULL`:
3834    ///
3835    /// * The routine requires no extra storage.
3836    /// * The routine supports asynchronous execution.
3837    /// * The routine supports CUDA graph capture.
3838    pub fn cusparseCbsrilu02(
3839        handle: cusparseHandle_t,
3840        dirA: cusparseDirection_t,
3841        mb: ::core::ffi::c_int,
3842        nnzb: ::core::ffi::c_int,
3843        descrA: cusparseMatDescr_t,
3844        bsrSortedVal: *mut cuComplex,
3845        bsrSortedRowPtr: *const ::core::ffi::c_int,
3846        bsrSortedColInd: *const ::core::ffi::c_int,
3847        blockDim: ::core::ffi::c_int,
3848        info: bsrilu02Info_t,
3849        policy: cusparseSolvePolicy_t,
3850        pBuffer: *mut ::core::ffi::c_void,
3851    ) -> cusparseStatus_t;
3852}
3853unsafe extern "C" {
3854    /// This function performs the solve phase of the incomplete-LU factorization with 0 fill-in and no pivoting.
3855    ///
3856    /// `A` is an `(mb*blockDim)×(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored. Function `bsrilu02()` supports an arbitrary `blockDim`.
3857    ///
3858    /// This function requires a buffer size returned by `bsrilu02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3859    ///
3860    /// Although `bsrilu02()` can be used without level information, the user must be aware of consistency. If `bsrilu02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsrilu02()` can be run with or without levels. On the other hand, if `bsrilu02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsrilu02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
3861    ///
3862    /// Function `bsrilu02()` has the same behavior as `csrilu02()`. That is, `bsr2csr(bsrilu02(A)) = csrilu02(bsr2csr(A))`. The numerical zero of `csrilu02()` means there exists some zero `U(j,j)`. The numerical zero of `bsrilu02()` means there exists some block `U(j,j)` that is not invertible.
3863    ///
3864    /// Function `bsrilu02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsrilu02_zeroPivot`] to know where the numerical zero is.
3865    ///
3866    /// For example, suppose `A` is a real m-by-m matrix where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of LU factors `L` and `U`.
3867    ///
3868    /// The function supports the following properties if `pBuffer != NULL`:
3869    ///
3870    /// * The routine requires no extra storage.
3871    /// * The routine supports asynchronous execution.
3872    /// * The routine supports CUDA graph capture.
3873    pub fn cusparseZbsrilu02(
3874        handle: cusparseHandle_t,
3875        dirA: cusparseDirection_t,
3876        mb: ::core::ffi::c_int,
3877        nnzb: ::core::ffi::c_int,
3878        descrA: cusparseMatDescr_t,
3879        bsrSortedVal: *mut cuDoubleComplex,
3880        bsrSortedRowPtr: *const ::core::ffi::c_int,
3881        bsrSortedColInd: *const ::core::ffi::c_int,
3882        blockDim: ::core::ffi::c_int,
3883        info: bsrilu02Info_t,
3884        policy: cusparseSolvePolicy_t,
3885        pBuffer: *mut ::core::ffi::c_void,
3886    ) -> cusparseStatus_t;
3887}
3888unsafe extern "C" {
3889    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero; otherwise, `position=-1`.
3890    ///
3891    /// The `position` can be 0-based or 1-based, the same as the matrix.
3892    ///
3893    /// Function [`cusparseXcsric02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
3894    ///
3895    /// The `position` can be in the host memory or device memory. The user can set proper mode with [`cusparseSetPointerMode`].
3896    ///
3897    /// * The routine requires no extra storage.
3898    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
3899    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
3900    ///
3901    /// # Parameters
3902    ///
3903    /// - `position`: if no structural or numerical zero, `position` is -1; otherwise, if `A(j,j)` is missing or `L(j,j)` is zero, `position=j`.
3904    #[deprecated]
3905    pub fn cusparseXcsric02_zeroPivot(
3906        handle: cusparseHandle_t,
3907        info: csric02Info_t,
3908        position: *mut ::core::ffi::c_int,
3909    ) -> cusparseStatus_t;
3910}
3911unsafe extern "C" {
3912    /// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
3913    ///
3914    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3915    ///
3916    /// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3917    ///
3918    /// * The routine requires no extra storage.
3919    /// * The routine supports asynchronous execution.
3920    /// * The routine supports CUDA graph capture.
3921    pub fn cusparseScsric02_bufferSize(
3922        handle: cusparseHandle_t,
3923        m: ::core::ffi::c_int,
3924        nnz: ::core::ffi::c_int,
3925        descrA: cusparseMatDescr_t,
3926        csrSortedValA: *mut f32,
3927        csrSortedRowPtrA: *const ::core::ffi::c_int,
3928        csrSortedColIndA: *const ::core::ffi::c_int,
3929        info: csric02Info_t,
3930        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3931    ) -> cusparseStatus_t;
3932}
3933unsafe extern "C" {
3934    /// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
3935    ///
3936    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3937    ///
3938    /// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3939    ///
3940    /// * The routine requires no extra storage.
3941    /// * The routine supports asynchronous execution.
3942    /// * The routine supports CUDA graph capture.
3943    pub fn cusparseDcsric02_bufferSize(
3944        handle: cusparseHandle_t,
3945        m: ::core::ffi::c_int,
3946        nnz: ::core::ffi::c_int,
3947        descrA: cusparseMatDescr_t,
3948        csrSortedValA: *mut f64,
3949        csrSortedRowPtrA: *const ::core::ffi::c_int,
3950        csrSortedColIndA: *const ::core::ffi::c_int,
3951        info: csric02Info_t,
3952        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3953    ) -> cusparseStatus_t;
3954}
3955unsafe extern "C" {
3956    /// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
3957    ///
3958    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3959    ///
3960    /// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3961    ///
3962    /// * The routine requires no extra storage.
3963    /// * The routine supports asynchronous execution.
3964    /// * The routine supports CUDA graph capture.
3965    pub fn cusparseCcsric02_bufferSize(
3966        handle: cusparseHandle_t,
3967        m: ::core::ffi::c_int,
3968        nnz: ::core::ffi::c_int,
3969        descrA: cusparseMatDescr_t,
3970        csrSortedValA: *mut cuComplex,
3971        csrSortedRowPtrA: *const ::core::ffi::c_int,
3972        csrSortedColIndA: *const ::core::ffi::c_int,
3973        info: csric02Info_t,
3974        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3975    ) -> cusparseStatus_t;
3976}
3977unsafe extern "C" {
3978    /// This function returns size of buffer used in computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
3979    ///
3980    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
3981    ///
3982    /// The buffer size depends on dimension `m` and `nnz`, the number of nonzeros of the matrix. If the user changes the matrix, it is necessary to call `csric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
3983    ///
3984    /// * The routine requires no extra storage.
3985    /// * The routine supports asynchronous execution.
3986    /// * The routine supports CUDA graph capture.
3987    pub fn cusparseZcsric02_bufferSize(
3988        handle: cusparseHandle_t,
3989        m: ::core::ffi::c_int,
3990        nnz: ::core::ffi::c_int,
3991        descrA: cusparseMatDescr_t,
3992        csrSortedValA: *mut cuDoubleComplex,
3993        csrSortedRowPtrA: *const ::core::ffi::c_int,
3994        csrSortedColIndA: *const ::core::ffi::c_int,
3995        info: csric02Info_t,
3996        pBufferSizeInBytes: *mut ::core::ffi::c_int,
3997    ) -> cusparseStatus_t;
3998}
3999unsafe extern "C" {
4000    pub fn cusparseScsric02_bufferSizeExt(
4001        handle: cusparseHandle_t,
4002        m: ::core::ffi::c_int,
4003        nnz: ::core::ffi::c_int,
4004        descrA: cusparseMatDescr_t,
4005        csrSortedVal: *mut f32,
4006        csrSortedRowPtr: *const ::core::ffi::c_int,
4007        csrSortedColInd: *const ::core::ffi::c_int,
4008        info: csric02Info_t,
4009        pBufferSize: *mut size_t,
4010    ) -> cusparseStatus_t;
4011}
4012unsafe extern "C" {
4013    pub fn cusparseDcsric02_bufferSizeExt(
4014        handle: cusparseHandle_t,
4015        m: ::core::ffi::c_int,
4016        nnz: ::core::ffi::c_int,
4017        descrA: cusparseMatDescr_t,
4018        csrSortedVal: *mut f64,
4019        csrSortedRowPtr: *const ::core::ffi::c_int,
4020        csrSortedColInd: *const ::core::ffi::c_int,
4021        info: csric02Info_t,
4022        pBufferSize: *mut size_t,
4023    ) -> cusparseStatus_t;
4024}
4025unsafe extern "C" {
4026    pub fn cusparseCcsric02_bufferSizeExt(
4027        handle: cusparseHandle_t,
4028        m: ::core::ffi::c_int,
4029        nnz: ::core::ffi::c_int,
4030        descrA: cusparseMatDescr_t,
4031        csrSortedVal: *mut cuComplex,
4032        csrSortedRowPtr: *const ::core::ffi::c_int,
4033        csrSortedColInd: *const ::core::ffi::c_int,
4034        info: csric02Info_t,
4035        pBufferSize: *mut size_t,
4036    ) -> cusparseStatus_t;
4037}
4038unsafe extern "C" {
4039    pub fn cusparseZcsric02_bufferSizeExt(
4040        handle: cusparseHandle_t,
4041        m: ::core::ffi::c_int,
4042        nnz: ::core::ffi::c_int,
4043        descrA: cusparseMatDescr_t,
4044        csrSortedVal: *mut cuDoubleComplex,
4045        csrSortedRowPtr: *const ::core::ffi::c_int,
4046        csrSortedColInd: *const ::core::ffi::c_int,
4047        info: csric02Info_t,
4048        pBufferSize: *mut size_t,
4049    ) -> cusparseStatus_t;
4050}
4051unsafe extern "C" {
4052    /// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4053    ///
4054    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
4055    ///
4056    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4057    ///
4058    /// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4059    ///
4060    /// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
4061    ///
4062    /// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
4063    ///
4064    /// * This function requires temporary extra storage that is allocated internally
4065    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
4066    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4067    pub fn cusparseScsric02_analysis(
4068        handle: cusparseHandle_t,
4069        m: ::core::ffi::c_int,
4070        nnz: ::core::ffi::c_int,
4071        descrA: cusparseMatDescr_t,
4072        csrSortedValA: *const f32,
4073        csrSortedRowPtrA: *const ::core::ffi::c_int,
4074        csrSortedColIndA: *const ::core::ffi::c_int,
4075        info: csric02Info_t,
4076        policy: cusparseSolvePolicy_t,
4077        pBuffer: *mut ::core::ffi::c_void,
4078    ) -> cusparseStatus_t;
4079}
4080unsafe extern "C" {
4081    /// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4082    ///
4083    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
4084    ///
4085    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4086    ///
4087    /// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4088    ///
4089    /// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
4090    ///
4091    /// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
4092    ///
4093    /// * This function requires temporary extra storage that is allocated internally
4094    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
4095    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4096    pub fn cusparseDcsric02_analysis(
4097        handle: cusparseHandle_t,
4098        m: ::core::ffi::c_int,
4099        nnz: ::core::ffi::c_int,
4100        descrA: cusparseMatDescr_t,
4101        csrSortedValA: *const f64,
4102        csrSortedRowPtrA: *const ::core::ffi::c_int,
4103        csrSortedColIndA: *const ::core::ffi::c_int,
4104        info: csric02Info_t,
4105        policy: cusparseSolvePolicy_t,
4106        pBuffer: *mut ::core::ffi::c_void,
4107    ) -> cusparseStatus_t;
4108}
4109unsafe extern "C" {
4110    /// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4111    ///
4112    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
4113    ///
4114    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4115    ///
4116    /// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4117    ///
4118    /// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
4119    ///
4120    /// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
4121    ///
4122    /// * This function requires temporary extra storage that is allocated internally
4123    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
4124    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4125    pub fn cusparseCcsric02_analysis(
4126        handle: cusparseHandle_t,
4127        m: ::core::ffi::c_int,
4128        nnz: ::core::ffi::c_int,
4129        descrA: cusparseMatDescr_t,
4130        csrSortedValA: *const cuComplex,
4131        csrSortedRowPtrA: *const ::core::ffi::c_int,
4132        csrSortedColIndA: *const ::core::ffi::c_int,
4133        info: csric02Info_t,
4134        policy: cusparseSolvePolicy_t,
4135        pBuffer: *mut ::core::ffi::c_void,
4136    ) -> cusparseStatus_t;
4137}
4138unsafe extern "C" {
4139    /// This function performs the analysis phase of the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4140    ///
4141    /// `A` is an $m \times m$ sparse matrix that is defined in CSR storage format by the three arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`.
4142    ///
4143    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4144    ///
4145    /// Function `csric02_analysis()` reports a structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `csric02()` can be done without level information. To disable level information, the user must specify the policy of `csric02_analysis()` and `csric02()` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4146    ///
4147    /// Function `csric02_analysis()` always reports the first structural zero, even if the policy is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user needs to call [`cusparseXcsric02_zeroPivot`] to know where the structural zero is.
4148    ///
4149    /// It is the user’s choice whether to call `csric02()` if `csric02_analysis()` reports a structural zero. In this case, the user can still call `csric02()`, which will return a numerical zero at the same position as the structural zero. However the result is meaningless.
4150    ///
4151    /// * This function requires temporary extra storage that is allocated internally
4152    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
4153    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4154    pub fn cusparseZcsric02_analysis(
4155        handle: cusparseHandle_t,
4156        m: ::core::ffi::c_int,
4157        nnz: ::core::ffi::c_int,
4158        descrA: cusparseMatDescr_t,
4159        csrSortedValA: *const cuDoubleComplex,
4160        csrSortedRowPtrA: *const ::core::ffi::c_int,
4161        csrSortedColIndA: *const ::core::ffi::c_int,
4162        info: csric02Info_t,
4163        policy: cusparseSolvePolicy_t,
4164        pBuffer: *mut ::core::ffi::c_void,
4165    ) -> cusparseStatus_t;
4166}
4167unsafe extern "C" {
4168    /// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4169    ///
4170    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4171    ///
4172    /// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4173    ///
4174    /// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
4175    ///
4176    /// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02()``A` is Hermitian and only the lower triangular part is provided.
4177    ///
4178    /// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
4179    ///
4180    /// The function supports the following properties if `pBuffer != NULL`:
4181    ///
4182    /// * This function requires temporary extra storage that is allocated internally.
4183    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4184    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4185    pub fn cusparseScsric02(
4186        handle: cusparseHandle_t,
4187        m: ::core::ffi::c_int,
4188        nnz: ::core::ffi::c_int,
4189        descrA: cusparseMatDescr_t,
4190        csrSortedValA_valM: *mut f32,
4191        csrSortedRowPtrA: *const ::core::ffi::c_int,
4192        csrSortedColIndA: *const ::core::ffi::c_int,
4193        info: csric02Info_t,
4194        policy: cusparseSolvePolicy_t,
4195        pBuffer: *mut ::core::ffi::c_void,
4196    ) -> cusparseStatus_t;
4197}
4198unsafe extern "C" {
4199    /// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4200    ///
4201    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4202    ///
4203    /// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4204    ///
4205    /// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
4206    ///
4207    /// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02()``A` is Hermitian and only the lower triangular part is provided.
4208    ///
4209    /// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
4210    ///
4211    /// The function supports the following properties if `pBuffer != NULL`:
4212    ///
4213    /// * This function requires temporary extra storage that is allocated internally.
4214    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4215    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4216    pub fn cusparseDcsric02(
4217        handle: cusparseHandle_t,
4218        m: ::core::ffi::c_int,
4219        nnz: ::core::ffi::c_int,
4220        descrA: cusparseMatDescr_t,
4221        csrSortedValA_valM: *mut f64,
4222        csrSortedRowPtrA: *const ::core::ffi::c_int,
4223        csrSortedColIndA: *const ::core::ffi::c_int,
4224        info: csric02Info_t,
4225        policy: cusparseSolvePolicy_t,
4226        pBuffer: *mut ::core::ffi::c_void,
4227    ) -> cusparseStatus_t;
4228}
4229unsafe extern "C" {
4230    /// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4231    ///
4232    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4233    ///
4234    /// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4235    ///
4236    /// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
4237    ///
4238    /// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02()``A` is Hermitian and only the lower triangular part is provided.
4239    ///
4240    /// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
4241    ///
4242    /// The function supports the following properties if `pBuffer != NULL`:
4243    ///
4244    /// * This function requires temporary extra storage that is allocated internally.
4245    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4246    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4247    pub fn cusparseCcsric02(
4248        handle: cusparseHandle_t,
4249        m: ::core::ffi::c_int,
4250        nnz: ::core::ffi::c_int,
4251        descrA: cusparseMatDescr_t,
4252        csrSortedValA_valM: *mut cuComplex,
4253        csrSortedRowPtrA: *const ::core::ffi::c_int,
4254        csrSortedColIndA: *const ::core::ffi::c_int,
4255        info: csric02Info_t,
4256        policy: cusparseSolvePolicy_t,
4257        pBuffer: *mut ::core::ffi::c_void,
4258    ) -> cusparseStatus_t;
4259}
4260unsafe extern "C" {
4261    /// This function performs the solve phase of the computing the incomplete-Cholesky factorization with $0$ fill-in and no pivoting:
4262    ///
4263    /// This function requires a buffer size returned by `csric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4264    ///
4265    /// Although `csric02()` can be done without level information, the user still needs to be aware of consistency. If `csric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `csric02()` can be run with or without levels. On the other hand, if `csric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `csric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4266    ///
4267    /// Function `csric02()` reports the first numerical zero, including a structural zero. The user must call [`cusparseXcsric02_zeroPivot`] to know where the numerical zero is.
4268    ///
4269    /// Function `csric02()` only takes the lower triangular part of matrix `A` to perform factorization. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], the fill mode and diagonal type are ignored, and the strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `csric02()``A` is Hermitian and only the lower triangular part is provided.
4270    ///
4271    /// For example, suppose `A` is a real m times m matrix, the following code solves the precondition system `M*y = x` where `M` is the product of Cholesky factorization `L` and its transpose.
4272    ///
4273    /// The function supports the following properties if `pBuffer != NULL`:
4274    ///
4275    /// * This function requires temporary extra storage that is allocated internally.
4276    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4277    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4278    pub fn cusparseZcsric02(
4279        handle: cusparseHandle_t,
4280        m: ::core::ffi::c_int,
4281        nnz: ::core::ffi::c_int,
4282        descrA: cusparseMatDescr_t,
4283        csrSortedValA_valM: *mut cuDoubleComplex,
4284        csrSortedRowPtrA: *const ::core::ffi::c_int,
4285        csrSortedColIndA: *const ::core::ffi::c_int,
4286        info: csric02Info_t,
4287        policy: cusparseSolvePolicy_t,
4288        pBuffer: *mut ::core::ffi::c_void,
4289    ) -> cusparseStatus_t;
4290}
4291unsafe extern "C" {
4292    /// If the returned error code is [`cusparseStatus_t::CUSPARSE_STATUS_ZERO_PIVOT`], `position=j` means `A(j,j)` has either a structural zero or a numerical zero (the block is not positive definite). Otherwise `position=-1`.
4293    ///
4294    /// The `position` can be 0-based or 1-based, the same as the matrix.
4295    ///
4296    /// Function [`cusparseXbsric02_zeroPivot`] is a blocking call. It calls `cudaDeviceSynchronize()` to make sure all previous kernels are done.
4297    ///
4298    /// The `position` can be in the host memory or device memory. The user can set the proper mode with [`cusparseSetPointerMode`].
4299    ///
4300    /// * The routine requires no extra storage.
4301    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4302    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4303    ///
4304    /// # Parameters
4305    ///
4306    /// - `position`: If no structural or numerical zero, `position` is -1, otherwise if `A(j,j)` is missing or `L(j,j)` is not positive definite, `position=j`.
4307    #[deprecated]
4308    pub fn cusparseXbsric02_zeroPivot(
4309        handle: cusparseHandle_t,
4310        info: bsric02Info_t,
4311        position: *mut ::core::ffi::c_int,
4312    ) -> cusparseStatus_t;
4313}
4314unsafe extern "C" {
4315    /// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4316    ///
4317    /// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
4318    ///
4319    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
4320    ///
4321    /// * The routine requires no extra storage.
4322    /// * The routine supports asynchronous execution.
4323    /// * The routine supports CUDA graph capture.
4324    pub fn cusparseSbsric02_bufferSize(
4325        handle: cusparseHandle_t,
4326        dirA: cusparseDirection_t,
4327        mb: ::core::ffi::c_int,
4328        nnzb: ::core::ffi::c_int,
4329        descrA: cusparseMatDescr_t,
4330        bsrSortedVal: *mut f32,
4331        bsrSortedRowPtr: *const ::core::ffi::c_int,
4332        bsrSortedColInd: *const ::core::ffi::c_int,
4333        blockDim: ::core::ffi::c_int,
4334        info: bsric02Info_t,
4335        pBufferSizeInBytes: *mut ::core::ffi::c_int,
4336    ) -> cusparseStatus_t;
4337}
4338unsafe extern "C" {
4339    /// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4340    ///
4341    /// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
4342    ///
4343    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
4344    ///
4345    /// * The routine requires no extra storage.
4346    /// * The routine supports asynchronous execution.
4347    /// * The routine supports CUDA graph capture.
4348    pub fn cusparseDbsric02_bufferSize(
4349        handle: cusparseHandle_t,
4350        dirA: cusparseDirection_t,
4351        mb: ::core::ffi::c_int,
4352        nnzb: ::core::ffi::c_int,
4353        descrA: cusparseMatDescr_t,
4354        bsrSortedVal: *mut f64,
4355        bsrSortedRowPtr: *const ::core::ffi::c_int,
4356        bsrSortedColInd: *const ::core::ffi::c_int,
4357        blockDim: ::core::ffi::c_int,
4358        info: bsric02Info_t,
4359        pBufferSizeInBytes: *mut ::core::ffi::c_int,
4360    ) -> cusparseStatus_t;
4361}
4362unsafe extern "C" {
4363    /// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4364    ///
4365    /// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
4366    ///
4367    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
4368    ///
4369    /// * The routine requires no extra storage.
4370    /// * The routine supports asynchronous execution.
4371    /// * The routine supports CUDA graph capture.
4372    pub fn cusparseCbsric02_bufferSize(
4373        handle: cusparseHandle_t,
4374        dirA: cusparseDirection_t,
4375        mb: ::core::ffi::c_int,
4376        nnzb: ::core::ffi::c_int,
4377        descrA: cusparseMatDescr_t,
4378        bsrSortedVal: *mut cuComplex,
4379        bsrSortedRowPtr: *const ::core::ffi::c_int,
4380        bsrSortedColInd: *const ::core::ffi::c_int,
4381        blockDim: ::core::ffi::c_int,
4382        info: bsric02Info_t,
4383        pBufferSizeInBytes: *mut ::core::ffi::c_int,
4384    ) -> cusparseStatus_t;
4385}
4386unsafe extern "C" {
4387    /// This function returns the size of a buffer used in computing the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4388    ///
4389    /// `A` is an `(mb*blockDim)*(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`.
4390    ///
4391    /// The buffer size depends on the dimensions of `mb`, `blockDim`, and the number of nonzero blocks of the matrix `nnzb`. If the user changes the matrix, it is necessary to call `bsric02_bufferSize()` again to have the correct buffer size; otherwise, a segmentation fault may occur.
4392    ///
4393    /// * The routine requires no extra storage.
4394    /// * The routine supports asynchronous execution.
4395    /// * The routine supports CUDA graph capture.
4396    pub fn cusparseZbsric02_bufferSize(
4397        handle: cusparseHandle_t,
4398        dirA: cusparseDirection_t,
4399        mb: ::core::ffi::c_int,
4400        nnzb: ::core::ffi::c_int,
4401        descrA: cusparseMatDescr_t,
4402        bsrSortedVal: *mut cuDoubleComplex,
4403        bsrSortedRowPtr: *const ::core::ffi::c_int,
4404        bsrSortedColInd: *const ::core::ffi::c_int,
4405        blockDim: ::core::ffi::c_int,
4406        info: bsric02Info_t,
4407        pBufferSizeInBytes: *mut ::core::ffi::c_int,
4408    ) -> cusparseStatus_t;
4409}
4410unsafe extern "C" {
4411    pub fn cusparseSbsric02_bufferSizeExt(
4412        handle: cusparseHandle_t,
4413        dirA: cusparseDirection_t,
4414        mb: ::core::ffi::c_int,
4415        nnzb: ::core::ffi::c_int,
4416        descrA: cusparseMatDescr_t,
4417        bsrSortedVal: *mut f32,
4418        bsrSortedRowPtr: *const ::core::ffi::c_int,
4419        bsrSortedColInd: *const ::core::ffi::c_int,
4420        blockSize: ::core::ffi::c_int,
4421        info: bsric02Info_t,
4422        pBufferSize: *mut size_t,
4423    ) -> cusparseStatus_t;
4424}
4425unsafe extern "C" {
4426    pub fn cusparseDbsric02_bufferSizeExt(
4427        handle: cusparseHandle_t,
4428        dirA: cusparseDirection_t,
4429        mb: ::core::ffi::c_int,
4430        nnzb: ::core::ffi::c_int,
4431        descrA: cusparseMatDescr_t,
4432        bsrSortedVal: *mut f64,
4433        bsrSortedRowPtr: *const ::core::ffi::c_int,
4434        bsrSortedColInd: *const ::core::ffi::c_int,
4435        blockSize: ::core::ffi::c_int,
4436        info: bsric02Info_t,
4437        pBufferSize: *mut size_t,
4438    ) -> cusparseStatus_t;
4439}
4440unsafe extern "C" {
4441    pub fn cusparseCbsric02_bufferSizeExt(
4442        handle: cusparseHandle_t,
4443        dirA: cusparseDirection_t,
4444        mb: ::core::ffi::c_int,
4445        nnzb: ::core::ffi::c_int,
4446        descrA: cusparseMatDescr_t,
4447        bsrSortedVal: *mut cuComplex,
4448        bsrSortedRowPtr: *const ::core::ffi::c_int,
4449        bsrSortedColInd: *const ::core::ffi::c_int,
4450        blockSize: ::core::ffi::c_int,
4451        info: bsric02Info_t,
4452        pBufferSize: *mut size_t,
4453    ) -> cusparseStatus_t;
4454}
4455unsafe extern "C" {
4456    pub fn cusparseZbsric02_bufferSizeExt(
4457        handle: cusparseHandle_t,
4458        dirA: cusparseDirection_t,
4459        mb: ::core::ffi::c_int,
4460        nnzb: ::core::ffi::c_int,
4461        descrA: cusparseMatDescr_t,
4462        bsrSortedVal: *mut cuDoubleComplex,
4463        bsrSortedRowPtr: *const ::core::ffi::c_int,
4464        bsrSortedColInd: *const ::core::ffi::c_int,
4465        blockSize: ::core::ffi::c_int,
4466        info: bsric02Info_t,
4467        pBufferSize: *mut size_t,
4468    ) -> cusparseStatus_t;
4469}
4470unsafe extern "C" {
4471    /// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4472    ///
4473    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4474    ///
4475    /// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4476    ///
4477    /// Function`bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4478    ///
4479    /// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
4480    ///
4481    /// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
4482    ///
4483    /// * This function requires temporary extra storage that is allocated internally.
4484    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4485    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4486    pub fn cusparseSbsric02_analysis(
4487        handle: cusparseHandle_t,
4488        dirA: cusparseDirection_t,
4489        mb: ::core::ffi::c_int,
4490        nnzb: ::core::ffi::c_int,
4491        descrA: cusparseMatDescr_t,
4492        bsrSortedVal: *const f32,
4493        bsrSortedRowPtr: *const ::core::ffi::c_int,
4494        bsrSortedColInd: *const ::core::ffi::c_int,
4495        blockDim: ::core::ffi::c_int,
4496        info: bsric02Info_t,
4497        policy: cusparseSolvePolicy_t,
4498        pInputBuffer: *mut ::core::ffi::c_void,
4499    ) -> cusparseStatus_t;
4500}
4501unsafe extern "C" {
4502    /// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4503    ///
4504    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4505    ///
4506    /// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4507    ///
4508    /// Function`bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4509    ///
4510    /// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
4511    ///
4512    /// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
4513    ///
4514    /// * This function requires temporary extra storage that is allocated internally.
4515    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4516    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4517    pub fn cusparseDbsric02_analysis(
4518        handle: cusparseHandle_t,
4519        dirA: cusparseDirection_t,
4520        mb: ::core::ffi::c_int,
4521        nnzb: ::core::ffi::c_int,
4522        descrA: cusparseMatDescr_t,
4523        bsrSortedVal: *const f64,
4524        bsrSortedRowPtr: *const ::core::ffi::c_int,
4525        bsrSortedColInd: *const ::core::ffi::c_int,
4526        blockDim: ::core::ffi::c_int,
4527        info: bsric02Info_t,
4528        policy: cusparseSolvePolicy_t,
4529        pInputBuffer: *mut ::core::ffi::c_void,
4530    ) -> cusparseStatus_t;
4531}
4532unsafe extern "C" {
4533    /// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4534    ///
4535    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4536    ///
4537    /// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4538    ///
4539    /// Function`bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4540    ///
4541    /// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
4542    ///
4543    /// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
4544    ///
4545    /// * This function requires temporary extra storage that is allocated internally.
4546    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4547    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4548    pub fn cusparseCbsric02_analysis(
4549        handle: cusparseHandle_t,
4550        dirA: cusparseDirection_t,
4551        mb: ::core::ffi::c_int,
4552        nnzb: ::core::ffi::c_int,
4553        descrA: cusparseMatDescr_t,
4554        bsrSortedVal: *const cuComplex,
4555        bsrSortedRowPtr: *const ::core::ffi::c_int,
4556        bsrSortedColInd: *const ::core::ffi::c_int,
4557        blockDim: ::core::ffi::c_int,
4558        info: bsric02Info_t,
4559        policy: cusparseSolvePolicy_t,
4560        pInputBuffer: *mut ::core::ffi::c_void,
4561    ) -> cusparseStatus_t;
4562}
4563unsafe extern "C" {
4564    /// This function performs the analysis phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting
4565    ///
4566    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4567    ///
4568    /// This function requires a buffer size returned by `bsric02_bufferSize90`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4569    ///
4570    /// Function`bsric02_analysis()` reports structural zero and computes level information stored in the opaque structure `info`. The level information can extract more parallelism during incomplete Cholesky factorization. However `bsric02()` can be done without level information. To disable level information, the user needs to specify the parameter `policy` of `bsric02[_analysis| ]` as [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`].
4571    ///
4572    /// Function `bsric02_analysis` always reports the first structural zero, even when parameter `policy` is [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]. The user must call [`cusparseXbsric02_zeroPivot`] to know where the structural zero is.
4573    ///
4574    /// It is the user’s choice whether to call `bsric02()` if `bsric02_analysis()` reports a structural zero. In this case, the user can still call `bsric02()`, which returns a numerical zero in the same position as the structural zero. However the result is meaningless.
4575    ///
4576    /// * This function requires temporary extra storage that is allocated internally.
4577    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
4578    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
4579    pub fn cusparseZbsric02_analysis(
4580        handle: cusparseHandle_t,
4581        dirA: cusparseDirection_t,
4582        mb: ::core::ffi::c_int,
4583        nnzb: ::core::ffi::c_int,
4584        descrA: cusparseMatDescr_t,
4585        bsrSortedVal: *const cuDoubleComplex,
4586        bsrSortedRowPtr: *const ::core::ffi::c_int,
4587        bsrSortedColInd: *const ::core::ffi::c_int,
4588        blockDim: ::core::ffi::c_int,
4589        info: bsric02Info_t,
4590        policy: cusparseSolvePolicy_t,
4591        pInputBuffer: *mut ::core::ffi::c_void,
4592    ) -> cusparseStatus_t;
4593}
4594unsafe extern "C" {
4595    /// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
4596    ///
4597    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4598    ///
4599    /// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4600    ///
4601    /// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4602    ///
4603    /// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
4604    ///
4605    /// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
4606    ///
4607    /// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
4608    ///
4609    /// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
4610    ///
4611    /// The function supports the following properties if `pBuffer != NULL`:
4612    ///
4613    /// * The routine requires no extra storage.
4614    /// * The routine supports asynchronous execution.
4615    /// * The routine supports CUDA graph capture.
4616    pub fn cusparseSbsric02(
4617        handle: cusparseHandle_t,
4618        dirA: cusparseDirection_t,
4619        mb: ::core::ffi::c_int,
4620        nnzb: ::core::ffi::c_int,
4621        descrA: cusparseMatDescr_t,
4622        bsrSortedVal: *mut f32,
4623        bsrSortedRowPtr: *const ::core::ffi::c_int,
4624        bsrSortedColInd: *const ::core::ffi::c_int,
4625        blockDim: ::core::ffi::c_int,
4626        info: bsric02Info_t,
4627        policy: cusparseSolvePolicy_t,
4628        pBuffer: *mut ::core::ffi::c_void,
4629    ) -> cusparseStatus_t;
4630}
4631unsafe extern "C" {
4632    /// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
4633    ///
4634    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4635    ///
4636    /// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4637    ///
4638    /// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4639    ///
4640    /// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
4641    ///
4642    /// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
4643    ///
4644    /// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
4645    ///
4646    /// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
4647    ///
4648    /// The function supports the following properties if `pBuffer != NULL`:
4649    ///
4650    /// * The routine requires no extra storage.
4651    /// * The routine supports asynchronous execution.
4652    /// * The routine supports CUDA graph capture.
4653    pub fn cusparseDbsric02(
4654        handle: cusparseHandle_t,
4655        dirA: cusparseDirection_t,
4656        mb: ::core::ffi::c_int,
4657        nnzb: ::core::ffi::c_int,
4658        descrA: cusparseMatDescr_t,
4659        bsrSortedVal: *mut f64,
4660        bsrSortedRowPtr: *const ::core::ffi::c_int,
4661        bsrSortedColInd: *const ::core::ffi::c_int,
4662        blockDim: ::core::ffi::c_int,
4663        info: bsric02Info_t,
4664        policy: cusparseSolvePolicy_t,
4665        pBuffer: *mut ::core::ffi::c_void,
4666    ) -> cusparseStatus_t;
4667}
4668unsafe extern "C" {
4669    /// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
4670    ///
4671    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4672    ///
4673    /// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4674    ///
4675    /// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4676    ///
4677    /// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
4678    ///
4679    /// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
4680    ///
4681    /// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
4682    ///
4683    /// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
4684    ///
4685    /// The function supports the following properties if `pBuffer != NULL`:
4686    ///
4687    /// * The routine requires no extra storage.
4688    /// * The routine supports asynchronous execution.
4689    /// * The routine supports CUDA graph capture.
4690    pub fn cusparseCbsric02(
4691        handle: cusparseHandle_t,
4692        dirA: cusparseDirection_t,
4693        mb: ::core::ffi::c_int,
4694        nnzb: ::core::ffi::c_int,
4695        descrA: cusparseMatDescr_t,
4696        bsrSortedVal: *mut cuComplex,
4697        bsrSortedRowPtr: *const ::core::ffi::c_int,
4698        bsrSortedColInd: *const ::core::ffi::c_int,
4699        blockDim: ::core::ffi::c_int,
4700        info: bsric02Info_t,
4701        policy: cusparseSolvePolicy_t,
4702        pBuffer: *mut ::core::ffi::c_void,
4703    ) -> cusparseStatus_t;
4704}
4705unsafe extern "C" {
4706    /// This function performs the solve phase of the incomplete-Cholesky factorization with 0 fill-in and no pivoting.
4707    ///
4708    /// `A` is an `(mb*blockDim)x(mb*blockDim)` sparse matrix that is defined in BSR storage format by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`. The block in BSR format is of size `blockDim*blockDim`, stored as column-major or row-major as determined by parameter `dirA`, which is either [`cusparseDirection_t::CUSPARSE_DIRECTION_COLUMN`] or [`cusparseDirection_t::CUSPARSE_DIRECTION_ROW`]. The matrix type must be [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`], and the fill mode and diagonal type are ignored.
4709    ///
4710    /// This function requires a buffer size returned by `bsric02_bufferSize()`. The address of `pBuffer` must be a multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4711    ///
4712    /// Although `bsric02()` can be done without level information, the user must be aware of consistency. If `bsric02_analysis()` is called with policy [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_USE_LEVEL`], `bsric02()` can be run with or without levels. On the other hand, if `bsric02_analysis()` is called with [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`], `bsric02()` can only accept [`cusparseSolvePolicy_t::CUSPARSE_SOLVE_POLICY_NO_LEVEL`]; otherwise, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4713    ///
4714    /// Function `bsric02()` has the same behavior as `csric02()`. That is, `bsr2csr(bsric02(A)) = csric02(bsr2csr(A))`. The numerical zero of `csric02()` means there exists some zero `L(j,j)`. The numerical zero of `bsric02()` means there exists some block `Lj,j)` that is not invertible.
4715    ///
4716    /// Function `bsric02` reports the first numerical zero, including a structural zero. The user must call [`cusparseXbsric02_zeroPivot`] to know where the numerical zero is.
4717    ///
4718    /// The `bsric02()` function only takes the lower triangular part of matrix `A` to perform factorization. The strictly upper triangular part is ignored and never touched. It does not matter if `A` is Hermitian or not. In other words, from the point of view of `bsric02()`, `A` is Hermitian and only the lower triangular part is provided. Moreover, the imaginary part of diagonal elements of diagonal blocks is ignored.
4719    ///
4720    /// For example, suppose `A` is a real m-by-m matrix, where `m=mb*blockDim`. The following code solves precondition system `M*y = x`, where `M` is the product of Cholesky factorization `L` and its transpose.
4721    ///
4722    /// The function supports the following properties if `pBuffer != NULL`:
4723    ///
4724    /// * The routine requires no extra storage.
4725    /// * The routine supports asynchronous execution.
4726    /// * The routine supports CUDA graph capture.
4727    pub fn cusparseZbsric02(
4728        handle: cusparseHandle_t,
4729        dirA: cusparseDirection_t,
4730        mb: ::core::ffi::c_int,
4731        nnzb: ::core::ffi::c_int,
4732        descrA: cusparseMatDescr_t,
4733        bsrSortedVal: *mut cuDoubleComplex,
4734        bsrSortedRowPtr: *const ::core::ffi::c_int,
4735        bsrSortedColInd: *const ::core::ffi::c_int,
4736        blockDim: ::core::ffi::c_int,
4737        info: bsric02Info_t,
4738        policy: cusparseSolvePolicy_t,
4739        pBuffer: *mut ::core::ffi::c_void,
4740    ) -> cusparseStatus_t;
4741}
4742unsafe extern "C" {
4743    pub fn cusparseSgtsv2_bufferSizeExt(
4744        handle: cusparseHandle_t,
4745        m: ::core::ffi::c_int,
4746        n: ::core::ffi::c_int,
4747        dl: *const f32,
4748        d: *const f32,
4749        du: *const f32,
4750        B: *const f32,
4751        ldb: ::core::ffi::c_int,
4752        bufferSizeInBytes: *mut size_t,
4753    ) -> cusparseStatus_t;
4754}
4755unsafe extern "C" {
4756    pub fn cusparseDgtsv2_bufferSizeExt(
4757        handle: cusparseHandle_t,
4758        m: ::core::ffi::c_int,
4759        n: ::core::ffi::c_int,
4760        dl: *const f64,
4761        d: *const f64,
4762        du: *const f64,
4763        B: *const f64,
4764        ldb: ::core::ffi::c_int,
4765        bufferSizeInBytes: *mut size_t,
4766    ) -> cusparseStatus_t;
4767}
4768unsafe extern "C" {
4769    pub fn cusparseCgtsv2_bufferSizeExt(
4770        handle: cusparseHandle_t,
4771        m: ::core::ffi::c_int,
4772        n: ::core::ffi::c_int,
4773        dl: *const cuComplex,
4774        d: *const cuComplex,
4775        du: *const cuComplex,
4776        B: *const cuComplex,
4777        ldb: ::core::ffi::c_int,
4778        bufferSizeInBytes: *mut size_t,
4779    ) -> cusparseStatus_t;
4780}
4781unsafe extern "C" {
4782    pub fn cusparseZgtsv2_bufferSizeExt(
4783        handle: cusparseHandle_t,
4784        m: ::core::ffi::c_int,
4785        n: ::core::ffi::c_int,
4786        dl: *const cuDoubleComplex,
4787        d: *const cuDoubleComplex,
4788        du: *const cuDoubleComplex,
4789        B: *const cuDoubleComplex,
4790        ldb: ::core::ffi::c_int,
4791        bufferSizeInBytes: *mut size_t,
4792    ) -> cusparseStatus_t;
4793}
4794unsafe extern "C" {
4795    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
4796    ///
4797    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4798    ///
4799    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
4800    ///
4801    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
4802    ///
4803    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
4804    ///
4805    /// `d(i) = A(i,i)` for `i=1,2,...,m`
4806    ///
4807    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
4808    ///
4809    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
4810    ///
4811    /// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparseSgtsv_nopivot()` or [`cusparseSgtsv2_nopivot`] at the expense of some execution time.
4812    ///
4813    /// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4814    ///
4815    /// * The routine requires no extra storage.
4816    /// * The routine supports asynchronous execution.
4817    /// * The routine supports CUDA graph capture.
4818    pub fn cusparseSgtsv2(
4819        handle: cusparseHandle_t,
4820        m: ::core::ffi::c_int,
4821        n: ::core::ffi::c_int,
4822        dl: *const f32,
4823        d: *const f32,
4824        du: *const f32,
4825        B: *mut f32,
4826        ldb: ::core::ffi::c_int,
4827        pBuffer: *mut ::core::ffi::c_void,
4828    ) -> cusparseStatus_t;
4829}
4830unsafe extern "C" {
4831    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
4832    ///
4833    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4834    ///
4835    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
4836    ///
4837    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
4838    ///
4839    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
4840    ///
4841    /// `d(i) = A(i,i)` for `i=1,2,...,m`
4842    ///
4843    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
4844    ///
4845    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
4846    ///
4847    /// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparseDgtsv_nopivot()` or [`cusparseDgtsv2_nopivot`] at the expense of some execution time.
4848    ///
4849    /// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4850    ///
4851    /// * The routine requires no extra storage.
4852    /// * The routine supports asynchronous execution.
4853    /// * The routine supports CUDA graph capture.
4854    pub fn cusparseDgtsv2(
4855        handle: cusparseHandle_t,
4856        m: ::core::ffi::c_int,
4857        n: ::core::ffi::c_int,
4858        dl: *const f64,
4859        d: *const f64,
4860        du: *const f64,
4861        B: *mut f64,
4862        ldb: ::core::ffi::c_int,
4863        pBuffer: *mut ::core::ffi::c_void,
4864    ) -> cusparseStatus_t;
4865}
4866unsafe extern "C" {
4867    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
4868    ///
4869    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4870    ///
4871    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
4872    ///
4873    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
4874    ///
4875    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
4876    ///
4877    /// `d(i) = A(i,i)` for `i=1,2,...,m`
4878    ///
4879    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
4880    ///
4881    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
4882    ///
4883    /// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparseCgtsv_nopivot()` or [`cusparseCgtsv2_nopivot`] at the expense of some execution time.
4884    ///
4885    /// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4886    ///
4887    /// * The routine requires no extra storage.
4888    /// * The routine supports asynchronous execution.
4889    /// * The routine supports CUDA graph capture.
4890    pub fn cusparseCgtsv2(
4891        handle: cusparseHandle_t,
4892        m: ::core::ffi::c_int,
4893        n: ::core::ffi::c_int,
4894        dl: *const cuComplex,
4895        d: *const cuComplex,
4896        du: *const cuComplex,
4897        B: *mut cuComplex,
4898        ldb: ::core::ffi::c_int,
4899        pBuffer: *mut ::core::ffi::c_void,
4900    ) -> cusparseStatus_t;
4901}
4902unsafe extern "C" {
4903    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
4904    ///
4905    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4906    ///
4907    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
4908    ///
4909    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
4910    ///
4911    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
4912    ///
4913    /// `d(i) = A(i,i)` for `i=1,2,...,m`
4914    ///
4915    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
4916    ///
4917    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
4918    ///
4919    /// The routine does perform pivoting, which usually results in more accurate and more stable results than `cusparseZgtsv_nopivot()` or [`cusparseZgtsv2_nopivot`] at the expense of some execution time.
4920    ///
4921    /// This function requires a buffer size returned by `gtsv2_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
4922    ///
4923    /// * The routine requires no extra storage.
4924    /// * The routine supports asynchronous execution.
4925    /// * The routine supports CUDA graph capture.
4926    pub fn cusparseZgtsv2(
4927        handle: cusparseHandle_t,
4928        m: ::core::ffi::c_int,
4929        n: ::core::ffi::c_int,
4930        dl: *const cuDoubleComplex,
4931        d: *const cuDoubleComplex,
4932        du: *const cuDoubleComplex,
4933        B: *mut cuDoubleComplex,
4934        ldb: ::core::ffi::c_int,
4935        pBuffer: *mut ::core::ffi::c_void,
4936    ) -> cusparseStatus_t;
4937}
4938unsafe extern "C" {
4939    /// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
4940    ///
4941    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4942    ///
4943    /// * The routine requires no extra storage.
4944    /// * The routine supports asynchronous execution.
4945    /// * The routine supports CUDA graph capture.
4946    pub fn cusparseSgtsv2_nopivot_bufferSizeExt(
4947        handle: cusparseHandle_t,
4948        m: ::core::ffi::c_int,
4949        n: ::core::ffi::c_int,
4950        dl: *const f32,
4951        d: *const f32,
4952        du: *const f32,
4953        B: *const f32,
4954        ldb: ::core::ffi::c_int,
4955        bufferSizeInBytes: *mut size_t,
4956    ) -> cusparseStatus_t;
4957}
4958unsafe extern "C" {
4959    /// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
4960    ///
4961    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4962    ///
4963    /// * The routine requires no extra storage.
4964    /// * The routine supports asynchronous execution.
4965    /// * The routine supports CUDA graph capture.
4966    pub fn cusparseDgtsv2_nopivot_bufferSizeExt(
4967        handle: cusparseHandle_t,
4968        m: ::core::ffi::c_int,
4969        n: ::core::ffi::c_int,
4970        dl: *const f64,
4971        d: *const f64,
4972        du: *const f64,
4973        B: *const f64,
4974        ldb: ::core::ffi::c_int,
4975        bufferSizeInBytes: *mut size_t,
4976    ) -> cusparseStatus_t;
4977}
4978unsafe extern "C" {
4979    /// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
4980    ///
4981    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
4982    ///
4983    /// * The routine requires no extra storage.
4984    /// * The routine supports asynchronous execution.
4985    /// * The routine supports CUDA graph capture.
4986    pub fn cusparseCgtsv2_nopivot_bufferSizeExt(
4987        handle: cusparseHandle_t,
4988        m: ::core::ffi::c_int,
4989        n: ::core::ffi::c_int,
4990        dl: *const cuComplex,
4991        d: *const cuComplex,
4992        du: *const cuComplex,
4993        B: *const cuComplex,
4994        ldb: ::core::ffi::c_int,
4995        bufferSizeInBytes: *mut size_t,
4996    ) -> cusparseStatus_t;
4997}
4998unsafe extern "C" {
4999    /// This function returns the size of the buffer used in `gtsv2_nopivot` which computes the solution of a tridiagonal linear system with multiple right-hand sides.
5000    ///
5001    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5002    ///
5003    /// * The routine requires no extra storage.
5004    /// * The routine supports asynchronous execution.
5005    /// * The routine supports CUDA graph capture.
5006    pub fn cusparseZgtsv2_nopivot_bufferSizeExt(
5007        handle: cusparseHandle_t,
5008        m: ::core::ffi::c_int,
5009        n: ::core::ffi::c_int,
5010        dl: *const cuDoubleComplex,
5011        d: *const cuDoubleComplex,
5012        du: *const cuDoubleComplex,
5013        B: *const cuDoubleComplex,
5014        ldb: ::core::ffi::c_int,
5015        bufferSizeInBytes: *mut size_t,
5016    ) -> cusparseStatus_t;
5017}
5018unsafe extern "C" {
5019    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
5020    ///
5021    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5022    ///
5023    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5024    ///
5025    /// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5026    ///
5027    /// * The routine requires no extra storage.
5028    /// * The routine supports asynchronous execution.
5029    /// * The routine supports CUDA graph capture.
5030    pub fn cusparseSgtsv2_nopivot(
5031        handle: cusparseHandle_t,
5032        m: ::core::ffi::c_int,
5033        n: ::core::ffi::c_int,
5034        dl: *const f32,
5035        d: *const f32,
5036        du: *const f32,
5037        B: *mut f32,
5038        ldb: ::core::ffi::c_int,
5039        pBuffer: *mut ::core::ffi::c_void,
5040    ) -> cusparseStatus_t;
5041}
5042unsafe extern "C" {
5043    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
5044    ///
5045    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5046    ///
5047    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5048    ///
5049    /// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5050    ///
5051    /// * The routine requires no extra storage.
5052    /// * The routine supports asynchronous execution.
5053    /// * The routine supports CUDA graph capture.
5054    pub fn cusparseDgtsv2_nopivot(
5055        handle: cusparseHandle_t,
5056        m: ::core::ffi::c_int,
5057        n: ::core::ffi::c_int,
5058        dl: *const f64,
5059        d: *const f64,
5060        du: *const f64,
5061        B: *mut f64,
5062        ldb: ::core::ffi::c_int,
5063        pBuffer: *mut ::core::ffi::c_void,
5064    ) -> cusparseStatus_t;
5065}
5066unsafe extern "C" {
5067    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
5068    ///
5069    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5070    ///
5071    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5072    ///
5073    /// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5074    ///
5075    /// * The routine requires no extra storage.
5076    /// * The routine supports asynchronous execution.
5077    /// * The routine supports CUDA graph capture.
5078    pub fn cusparseCgtsv2_nopivot(
5079        handle: cusparseHandle_t,
5080        m: ::core::ffi::c_int,
5081        n: ::core::ffi::c_int,
5082        dl: *const cuComplex,
5083        d: *const cuComplex,
5084        du: *const cuComplex,
5085        B: *mut cuComplex,
5086        ldb: ::core::ffi::c_int,
5087        pBuffer: *mut ::core::ffi::c_void,
5088    ) -> cusparseStatus_t;
5089}
5090unsafe extern "C" {
5091    /// This function computes the solution of a tridiagonal linear system with multiple right-hand sides:
5092    ///
5093    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5094    ///
5095    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5096    ///
5097    /// This function requires a buffer size returned by `gtsv2_nopivot_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5098    ///
5099    /// * The routine requires no extra storage.
5100    /// * The routine supports asynchronous execution.
5101    /// * The routine supports CUDA graph capture.
5102    pub fn cusparseZgtsv2_nopivot(
5103        handle: cusparseHandle_t,
5104        m: ::core::ffi::c_int,
5105        n: ::core::ffi::c_int,
5106        dl: *const cuDoubleComplex,
5107        d: *const cuDoubleComplex,
5108        du: *const cuDoubleComplex,
5109        B: *mut cuDoubleComplex,
5110        ldb: ::core::ffi::c_int,
5111        pBuffer: *mut ::core::ffi::c_void,
5112    ) -> cusparseStatus_t;
5113}
5114unsafe extern "C" {
5115    /// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5116    ///
5117    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5118    ///
5119    /// * The routine requires no extra storage.
5120    /// * The routine supports asynchronous execution.
5121    /// * The routine supports CUDA graph capture.
5122    pub fn cusparseSgtsv2StridedBatch_bufferSizeExt(
5123        handle: cusparseHandle_t,
5124        m: ::core::ffi::c_int,
5125        dl: *const f32,
5126        d: *const f32,
5127        du: *const f32,
5128        x: *const f32,
5129        batchCount: ::core::ffi::c_int,
5130        batchStride: ::core::ffi::c_int,
5131        bufferSizeInBytes: *mut size_t,
5132    ) -> cusparseStatus_t;
5133}
5134unsafe extern "C" {
5135    /// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5136    ///
5137    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5138    ///
5139    /// * The routine requires no extra storage.
5140    /// * The routine supports asynchronous execution.
5141    /// * The routine supports CUDA graph capture.
5142    pub fn cusparseDgtsv2StridedBatch_bufferSizeExt(
5143        handle: cusparseHandle_t,
5144        m: ::core::ffi::c_int,
5145        dl: *const f64,
5146        d: *const f64,
5147        du: *const f64,
5148        x: *const f64,
5149        batchCount: ::core::ffi::c_int,
5150        batchStride: ::core::ffi::c_int,
5151        bufferSizeInBytes: *mut size_t,
5152    ) -> cusparseStatus_t;
5153}
5154unsafe extern "C" {
5155    /// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5156    ///
5157    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5158    ///
5159    /// * The routine requires no extra storage.
5160    /// * The routine supports asynchronous execution.
5161    /// * The routine supports CUDA graph capture.
5162    pub fn cusparseCgtsv2StridedBatch_bufferSizeExt(
5163        handle: cusparseHandle_t,
5164        m: ::core::ffi::c_int,
5165        dl: *const cuComplex,
5166        d: *const cuComplex,
5167        du: *const cuComplex,
5168        x: *const cuComplex,
5169        batchCount: ::core::ffi::c_int,
5170        batchStride: ::core::ffi::c_int,
5171        bufferSizeInBytes: *mut size_t,
5172    ) -> cusparseStatus_t;
5173}
5174unsafe extern "C" {
5175    /// This function returns the size of the buffer used in `gtsv2StridedBatch` which computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5176    ///
5177    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5178    ///
5179    /// * The routine requires no extra storage.
5180    /// * The routine supports asynchronous execution.
5181    /// * The routine supports CUDA graph capture.
5182    pub fn cusparseZgtsv2StridedBatch_bufferSizeExt(
5183        handle: cusparseHandle_t,
5184        m: ::core::ffi::c_int,
5185        dl: *const cuDoubleComplex,
5186        d: *const cuDoubleComplex,
5187        du: *const cuDoubleComplex,
5188        x: *const cuDoubleComplex,
5189        batchCount: ::core::ffi::c_int,
5190        batchStride: ::core::ffi::c_int,
5191        bufferSizeInBytes: *mut size_t,
5192    ) -> cusparseStatus_t;
5193}
5194unsafe extern "C" {
5195    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5196    ///
5197    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5198    ///
5199    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5200    ///
5201    /// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5202    ///
5203    /// * The routine requires no extra storage.
5204    /// * The routine supports asynchronous execution.
5205    /// * The routine supports CUDA graph capture.
5206    pub fn cusparseSgtsv2StridedBatch(
5207        handle: cusparseHandle_t,
5208        m: ::core::ffi::c_int,
5209        dl: *const f32,
5210        d: *const f32,
5211        du: *const f32,
5212        x: *mut f32,
5213        batchCount: ::core::ffi::c_int,
5214        batchStride: ::core::ffi::c_int,
5215        pBuffer: *mut ::core::ffi::c_void,
5216    ) -> cusparseStatus_t;
5217}
5218unsafe extern "C" {
5219    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5220    ///
5221    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5222    ///
5223    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5224    ///
5225    /// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5226    ///
5227    /// * The routine requires no extra storage.
5228    /// * The routine supports asynchronous execution.
5229    /// * The routine supports CUDA graph capture.
5230    pub fn cusparseDgtsv2StridedBatch(
5231        handle: cusparseHandle_t,
5232        m: ::core::ffi::c_int,
5233        dl: *const f64,
5234        d: *const f64,
5235        du: *const f64,
5236        x: *mut f64,
5237        batchCount: ::core::ffi::c_int,
5238        batchStride: ::core::ffi::c_int,
5239        pBuffer: *mut ::core::ffi::c_void,
5240    ) -> cusparseStatus_t;
5241}
5242unsafe extern "C" {
5243    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5244    ///
5245    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5246    ///
5247    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5248    ///
5249    /// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5250    ///
5251    /// * The routine requires no extra storage.
5252    /// * The routine supports asynchronous execution.
5253    /// * The routine supports CUDA graph capture.
5254    pub fn cusparseCgtsv2StridedBatch(
5255        handle: cusparseHandle_t,
5256        m: ::core::ffi::c_int,
5257        dl: *const cuComplex,
5258        d: *const cuComplex,
5259        du: *const cuComplex,
5260        x: *mut cuComplex,
5261        batchCount: ::core::ffi::c_int,
5262        batchStride: ::core::ffi::c_int,
5263        pBuffer: *mut ::core::ffi::c_void,
5264    ) -> cusparseStatus_t;
5265}
5266unsafe extern "C" {
5267    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5268    ///
5269    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `X`. Notice that solution `Y` overwrites right-hand-side matrix `X` on exit. The different matrices are assumed to be of the same size and are stored with a fixed `batchStride` in memory.
5270    ///
5271    /// The routine does not perform any pivoting and uses a combination of the Cyclic Reduction (CR) and the Parallel Cyclic Reduction (PCR) algorithms to find the solution. It achieves better performance when `m` is a power of 2.
5272    ///
5273    /// This function requires a buffer size returned by `gtsv2StridedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5274    ///
5275    /// * The routine requires no extra storage.
5276    /// * The routine supports asynchronous execution.
5277    /// * The routine supports CUDA graph capture.
5278    pub fn cusparseZgtsv2StridedBatch(
5279        handle: cusparseHandle_t,
5280        m: ::core::ffi::c_int,
5281        dl: *const cuDoubleComplex,
5282        d: *const cuDoubleComplex,
5283        du: *const cuDoubleComplex,
5284        x: *mut cuDoubleComplex,
5285        batchCount: ::core::ffi::c_int,
5286        batchStride: ::core::ffi::c_int,
5287        pBuffer: *mut ::core::ffi::c_void,
5288    ) -> cusparseStatus_t;
5289}
5290unsafe extern "C" {
5291    pub fn cusparseSgtsvInterleavedBatch_bufferSizeExt(
5292        handle: cusparseHandle_t,
5293        algo: ::core::ffi::c_int,
5294        m: ::core::ffi::c_int,
5295        dl: *const f32,
5296        d: *const f32,
5297        du: *const f32,
5298        x: *const f32,
5299        batchCount: ::core::ffi::c_int,
5300        pBufferSizeInBytes: *mut size_t,
5301    ) -> cusparseStatus_t;
5302}
5303unsafe extern "C" {
5304    pub fn cusparseDgtsvInterleavedBatch_bufferSizeExt(
5305        handle: cusparseHandle_t,
5306        algo: ::core::ffi::c_int,
5307        m: ::core::ffi::c_int,
5308        dl: *const f64,
5309        d: *const f64,
5310        du: *const f64,
5311        x: *const f64,
5312        batchCount: ::core::ffi::c_int,
5313        pBufferSizeInBytes: *mut size_t,
5314    ) -> cusparseStatus_t;
5315}
5316unsafe extern "C" {
5317    pub fn cusparseCgtsvInterleavedBatch_bufferSizeExt(
5318        handle: cusparseHandle_t,
5319        algo: ::core::ffi::c_int,
5320        m: ::core::ffi::c_int,
5321        dl: *const cuComplex,
5322        d: *const cuComplex,
5323        du: *const cuComplex,
5324        x: *const cuComplex,
5325        batchCount: ::core::ffi::c_int,
5326        pBufferSizeInBytes: *mut size_t,
5327    ) -> cusparseStatus_t;
5328}
5329unsafe extern "C" {
5330    pub fn cusparseZgtsvInterleavedBatch_bufferSizeExt(
5331        handle: cusparseHandle_t,
5332        algo: ::core::ffi::c_int,
5333        m: ::core::ffi::c_int,
5334        dl: *const cuDoubleComplex,
5335        d: *const cuDoubleComplex,
5336        du: *const cuDoubleComplex,
5337        x: *const cuDoubleComplex,
5338        batchCount: ::core::ffi::c_int,
5339        pBufferSizeInBytes: *mut size_t,
5340    ) -> cusparseStatus_t;
5341}
5342unsafe extern "C" {
5343    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5344    ///
5345    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5346    ///
5347    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
5348    ///
5349    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5350    ///
5351    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5352    ///
5353    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5354    ///
5355    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5356    ///
5357    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5358    ///
5359    /// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
5360    ///
5361    /// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
5362    ///
5363    /// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5364    ///
5365    /// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
5366    ///
5367    /// * This function requires temporary extra storage that is allocated internally.
5368    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5369    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5370    pub fn cusparseSgtsvInterleavedBatch(
5371        handle: cusparseHandle_t,
5372        algo: ::core::ffi::c_int,
5373        m: ::core::ffi::c_int,
5374        dl: *mut f32,
5375        d: *mut f32,
5376        du: *mut f32,
5377        x: *mut f32,
5378        batchCount: ::core::ffi::c_int,
5379        pBuffer: *mut ::core::ffi::c_void,
5380    ) -> cusparseStatus_t;
5381}
5382unsafe extern "C" {
5383    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5384    ///
5385    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5386    ///
5387    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
5388    ///
5389    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5390    ///
5391    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5392    ///
5393    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5394    ///
5395    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5396    ///
5397    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5398    ///
5399    /// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
5400    ///
5401    /// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
5402    ///
5403    /// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5404    ///
5405    /// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
5406    ///
5407    /// * This function requires temporary extra storage that is allocated internally.
5408    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5409    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5410    pub fn cusparseDgtsvInterleavedBatch(
5411        handle: cusparseHandle_t,
5412        algo: ::core::ffi::c_int,
5413        m: ::core::ffi::c_int,
5414        dl: *mut f64,
5415        d: *mut f64,
5416        du: *mut f64,
5417        x: *mut f64,
5418        batchCount: ::core::ffi::c_int,
5419        pBuffer: *mut ::core::ffi::c_void,
5420    ) -> cusparseStatus_t;
5421}
5422unsafe extern "C" {
5423    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5424    ///
5425    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5426    ///
5427    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
5428    ///
5429    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5430    ///
5431    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5432    ///
5433    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5434    ///
5435    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5436    ///
5437    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5438    ///
5439    /// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
5440    ///
5441    /// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
5442    ///
5443    /// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5444    ///
5445    /// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
5446    ///
5447    /// * This function requires temporary extra storage that is allocated internally.
5448    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5449    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5450    pub fn cusparseCgtsvInterleavedBatch(
5451        handle: cusparseHandle_t,
5452        algo: ::core::ffi::c_int,
5453        m: ::core::ffi::c_int,
5454        dl: *mut cuComplex,
5455        d: *mut cuComplex,
5456        du: *mut cuComplex,
5457        x: *mut cuComplex,
5458        batchCount: ::core::ffi::c_int,
5459        pBuffer: *mut ::core::ffi::c_void,
5460    ) -> cusparseStatus_t;
5461}
5462unsafe extern "C" {
5463    /// This function computes the solution of multiple tridiagonal linear systems for *i*=0,…,`batchCount`:
5464    ///
5465    /// The coefficient matrix `A` of each of these tri-diagonal linear system is defined with three vectors corresponding to its lower (`dl`), main (`d`), and upper (`du`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5466    ///
5467    /// Assuming `A` is of size `m` and base-1, `dl`, `d` and `du` are defined by the following formula:
5468    ///
5469    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5470    ///
5471    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5472    ///
5473    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5474    ///
5475    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5476    ///
5477    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5478    ///
5479    /// The data layout is different from `gtsvStridedBatch` which aggregates all matrices one after another. Instead, `gtsvInterleavedBatch` gathers different matrices of the same element in a continous manner. If `dl` is regarded as a 2-D array of size `m-by-batchCount`, `dl(:,j)` to store `j-th` matrix. `gtsvStridedBatch` uses column-major while `gtsvInterleavedBatch` uses row-major.
5480    ///
5481    /// The routine provides three different algorithms, selected by parameter `algo`. The first algorithm is `cuThomas` provided by `Barcelona Supercomputing Center`. The second algorithm is LU with partial pivoting and last algorithm is QR. From stability perspective, cuThomas is not numerically stable because it does not have pivoting. LU with partial pivoting and QR are stable. From performance perspective, LU with partial pivoting and QR is about 10% to 20% slower than cuThomas.
5482    ///
5483    /// This function requires a buffer size returned by `gtsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5484    ///
5485    /// If the user prepares aggregate format, one can use `cublasXgeam` to get interleaved format. However such transformation takes time comparable to solver itself. To reach best performance, the user must prepare interleaved format explicitly.
5486    ///
5487    /// * This function requires temporary extra storage that is allocated internally.
5488    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5489    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5490    pub fn cusparseZgtsvInterleavedBatch(
5491        handle: cusparseHandle_t,
5492        algo: ::core::ffi::c_int,
5493        m: ::core::ffi::c_int,
5494        dl: *mut cuDoubleComplex,
5495        d: *mut cuDoubleComplex,
5496        du: *mut cuDoubleComplex,
5497        x: *mut cuDoubleComplex,
5498        batchCount: ::core::ffi::c_int,
5499        pBuffer: *mut ::core::ffi::c_void,
5500    ) -> cusparseStatus_t;
5501}
5502unsafe extern "C" {
5503    pub fn cusparseSgpsvInterleavedBatch_bufferSizeExt(
5504        handle: cusparseHandle_t,
5505        algo: ::core::ffi::c_int,
5506        m: ::core::ffi::c_int,
5507        ds: *const f32,
5508        dl: *const f32,
5509        d: *const f32,
5510        du: *const f32,
5511        dw: *const f32,
5512        x: *const f32,
5513        batchCount: ::core::ffi::c_int,
5514        pBufferSizeInBytes: *mut size_t,
5515    ) -> cusparseStatus_t;
5516}
5517unsafe extern "C" {
5518    pub fn cusparseDgpsvInterleavedBatch_bufferSizeExt(
5519        handle: cusparseHandle_t,
5520        algo: ::core::ffi::c_int,
5521        m: ::core::ffi::c_int,
5522        ds: *const f64,
5523        dl: *const f64,
5524        d: *const f64,
5525        du: *const f64,
5526        dw: *const f64,
5527        x: *const f64,
5528        batchCount: ::core::ffi::c_int,
5529        pBufferSizeInBytes: *mut size_t,
5530    ) -> cusparseStatus_t;
5531}
5532unsafe extern "C" {
5533    pub fn cusparseCgpsvInterleavedBatch_bufferSizeExt(
5534        handle: cusparseHandle_t,
5535        algo: ::core::ffi::c_int,
5536        m: ::core::ffi::c_int,
5537        ds: *const cuComplex,
5538        dl: *const cuComplex,
5539        d: *const cuComplex,
5540        du: *const cuComplex,
5541        dw: *const cuComplex,
5542        x: *const cuComplex,
5543        batchCount: ::core::ffi::c_int,
5544        pBufferSizeInBytes: *mut size_t,
5545    ) -> cusparseStatus_t;
5546}
5547unsafe extern "C" {
5548    pub fn cusparseZgpsvInterleavedBatch_bufferSizeExt(
5549        handle: cusparseHandle_t,
5550        algo: ::core::ffi::c_int,
5551        m: ::core::ffi::c_int,
5552        ds: *const cuDoubleComplex,
5553        dl: *const cuDoubleComplex,
5554        d: *const cuDoubleComplex,
5555        du: *const cuDoubleComplex,
5556        dw: *const cuDoubleComplex,
5557        x: *const cuDoubleComplex,
5558        batchCount: ::core::ffi::c_int,
5559        pBufferSizeInBytes: *mut size_t,
5560    ) -> cusparseStatus_t;
5561}
5562unsafe extern "C" {
5563    /// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
5564    ///
5565    /// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5566    ///
5567    /// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
5568    ///
5569    /// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
5570    ///
5571    /// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
5572    ///
5573    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5574    ///
5575    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5576    ///
5577    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5578    ///
5579    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5580    ///
5581    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5582    ///
5583    /// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
5584    ///
5585    /// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
5586    ///
5587    /// The data layout is the same as `gtsvStridedBatch`.
5588    ///
5589    /// The routine is numerically stable because it uses QR to solve the linear system.
5590    ///
5591    /// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5592    ///
5593    /// The function supports the following properties if `pBuffer != NULL`:
5594    ///
5595    /// * The routine requires no extra storage.
5596    /// * The routine supports asynchronous execution.
5597    /// * The routine supports CUDA graph capture.
5598    ///
5599    /// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
5600    pub fn cusparseSgpsvInterleavedBatch(
5601        handle: cusparseHandle_t,
5602        algo: ::core::ffi::c_int,
5603        m: ::core::ffi::c_int,
5604        ds: *mut f32,
5605        dl: *mut f32,
5606        d: *mut f32,
5607        du: *mut f32,
5608        dw: *mut f32,
5609        x: *mut f32,
5610        batchCount: ::core::ffi::c_int,
5611        pBuffer: *mut ::core::ffi::c_void,
5612    ) -> cusparseStatus_t;
5613}
5614unsafe extern "C" {
5615    /// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
5616    ///
5617    /// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5618    ///
5619    /// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
5620    ///
5621    /// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
5622    ///
5623    /// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
5624    ///
5625    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5626    ///
5627    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5628    ///
5629    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5630    ///
5631    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5632    ///
5633    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5634    ///
5635    /// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
5636    ///
5637    /// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
5638    ///
5639    /// The data layout is the same as `gtsvStridedBatch`.
5640    ///
5641    /// The routine is numerically stable because it uses QR to solve the linear system.
5642    ///
5643    /// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5644    ///
5645    /// The function supports the following properties if `pBuffer != NULL`:
5646    ///
5647    /// * The routine requires no extra storage.
5648    /// * The routine supports asynchronous execution.
5649    /// * The routine supports CUDA graph capture.
5650    ///
5651    /// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
5652    pub fn cusparseDgpsvInterleavedBatch(
5653        handle: cusparseHandle_t,
5654        algo: ::core::ffi::c_int,
5655        m: ::core::ffi::c_int,
5656        ds: *mut f64,
5657        dl: *mut f64,
5658        d: *mut f64,
5659        du: *mut f64,
5660        dw: *mut f64,
5661        x: *mut f64,
5662        batchCount: ::core::ffi::c_int,
5663        pBuffer: *mut ::core::ffi::c_void,
5664    ) -> cusparseStatus_t;
5665}
5666unsafe extern "C" {
5667    /// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
5668    ///
5669    /// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5670    ///
5671    /// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
5672    ///
5673    /// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
5674    ///
5675    /// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
5676    ///
5677    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5678    ///
5679    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5680    ///
5681    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5682    ///
5683    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5684    ///
5685    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5686    ///
5687    /// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
5688    ///
5689    /// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
5690    ///
5691    /// The data layout is the same as `gtsvStridedBatch`.
5692    ///
5693    /// The routine is numerically stable because it uses QR to solve the linear system.
5694    ///
5695    /// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5696    ///
5697    /// The function supports the following properties if `pBuffer != NULL`:
5698    ///
5699    /// * The routine requires no extra storage.
5700    /// * The routine supports asynchronous execution.
5701    /// * The routine supports CUDA graph capture.
5702    ///
5703    /// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
5704    pub fn cusparseCgpsvInterleavedBatch(
5705        handle: cusparseHandle_t,
5706        algo: ::core::ffi::c_int,
5707        m: ::core::ffi::c_int,
5708        ds: *mut cuComplex,
5709        dl: *mut cuComplex,
5710        d: *mut cuComplex,
5711        du: *mut cuComplex,
5712        dw: *mut cuComplex,
5713        x: *mut cuComplex,
5714        batchCount: ::core::ffi::c_int,
5715        pBuffer: *mut ::core::ffi::c_void,
5716    ) -> cusparseStatus_t;
5717}
5718unsafe extern "C" {
5719    /// This function computes the solution of multiple penta-diagonal linear systems for *i*=0,…,`batchCount`:
5720    ///
5721    /// The coefficient matrix `A` of each of these penta-diagonal linear system is defined with five vectors corresponding to its lower (`ds, dl`), main (`d`), and upper (`du, dw`) matrix diagonals; the right-hand sides are stored in the dense matrix `B`. Notice that solution `X` overwrites right-hand-side matrix `B` on exit.
5722    ///
5723    /// Assuming `A` is of size `m` and base-1, `ds`, `dl`, `d`, `du` and `dw` are defined by the following formula:
5724    ///
5725    /// `ds(i):= A(i, i-2)` for `i=1,2,...,m`
5726    ///
5727    /// The first two elements of ds is out-of-bound (`ds(1):= A(1,-1)`, `ds(2):= A(2,0)`), so `ds(1) = 0` and `ds(2) = 0`.
5728    ///
5729    /// `dl(i):= A(i, i-1)` for `i=1,2,...,m`
5730    ///
5731    /// The first element of dl is out-of-bound (`dl(1):= A(1,0)`), so `dl(1) = 0`.
5732    ///
5733    /// `d(i) = A(i,i)` for `i=1,2,...,m`
5734    ///
5735    /// `du(i) = A(i,i+1)` for `i=1,2,...,m`
5736    ///
5737    /// The last element of du is out-of-bound (`du(m):= A(m,m+1)`), so `du(m) = 0`.
5738    ///
5739    /// `dw(i) = A(i,i+2)` for `i=1,2,...,m`
5740    ///
5741    /// The last two elements of dw is out-of-bound (`dw(m-1):= A(m-1,m+1)`, `dw(m):= A(m,m+2)`), so `dw(m-1) = 0` and `dw(m) = 0`.
5742    ///
5743    /// The data layout is the same as `gtsvStridedBatch`.
5744    ///
5745    /// The routine is numerically stable because it uses QR to solve the linear system.
5746    ///
5747    /// This function requires a buffer size returned by `gpsvInterleavedBatch_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If it is not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
5748    ///
5749    /// The function supports the following properties if `pBuffer != NULL`:
5750    ///
5751    /// * The routine requires no extra storage.
5752    /// * The routine supports asynchronous execution.
5753    /// * The routine supports CUDA graph capture.
5754    ///
5755    /// Please visit [cuSPARSE Library Samples - cusparseSgpsvInterleavedBatch](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gpsvInterleavedBatch) for a code example.
5756    pub fn cusparseZgpsvInterleavedBatch(
5757        handle: cusparseHandle_t,
5758        algo: ::core::ffi::c_int,
5759        m: ::core::ffi::c_int,
5760        ds: *mut cuDoubleComplex,
5761        dl: *mut cuDoubleComplex,
5762        d: *mut cuDoubleComplex,
5763        du: *mut cuDoubleComplex,
5764        dw: *mut cuDoubleComplex,
5765        x: *mut cuDoubleComplex,
5766        batchCount: ::core::ffi::c_int,
5767        pBuffer: *mut ::core::ffi::c_void,
5768    ) -> cusparseStatus_t;
5769}
5770unsafe extern "C" {
5771    pub fn cusparseScsrgeam2_bufferSizeExt(
5772        handle: cusparseHandle_t,
5773        m: ::core::ffi::c_int,
5774        n: ::core::ffi::c_int,
5775        alpha: *const f32,
5776        descrA: cusparseMatDescr_t,
5777        nnzA: ::core::ffi::c_int,
5778        csrSortedValA: *const f32,
5779        csrSortedRowPtrA: *const ::core::ffi::c_int,
5780        csrSortedColIndA: *const ::core::ffi::c_int,
5781        beta: *const f32,
5782        descrB: cusparseMatDescr_t,
5783        nnzB: ::core::ffi::c_int,
5784        csrSortedValB: *const f32,
5785        csrSortedRowPtrB: *const ::core::ffi::c_int,
5786        csrSortedColIndB: *const ::core::ffi::c_int,
5787        descrC: cusparseMatDescr_t,
5788        csrSortedValC: *const f32,
5789        csrSortedRowPtrC: *const ::core::ffi::c_int,
5790        csrSortedColIndC: *const ::core::ffi::c_int,
5791        pBufferSizeInBytes: *mut size_t,
5792    ) -> cusparseStatus_t;
5793}
5794unsafe extern "C" {
5795    pub fn cusparseDcsrgeam2_bufferSizeExt(
5796        handle: cusparseHandle_t,
5797        m: ::core::ffi::c_int,
5798        n: ::core::ffi::c_int,
5799        alpha: *const f64,
5800        descrA: cusparseMatDescr_t,
5801        nnzA: ::core::ffi::c_int,
5802        csrSortedValA: *const f64,
5803        csrSortedRowPtrA: *const ::core::ffi::c_int,
5804        csrSortedColIndA: *const ::core::ffi::c_int,
5805        beta: *const f64,
5806        descrB: cusparseMatDescr_t,
5807        nnzB: ::core::ffi::c_int,
5808        csrSortedValB: *const f64,
5809        csrSortedRowPtrB: *const ::core::ffi::c_int,
5810        csrSortedColIndB: *const ::core::ffi::c_int,
5811        descrC: cusparseMatDescr_t,
5812        csrSortedValC: *const f64,
5813        csrSortedRowPtrC: *const ::core::ffi::c_int,
5814        csrSortedColIndC: *const ::core::ffi::c_int,
5815        pBufferSizeInBytes: *mut size_t,
5816    ) -> cusparseStatus_t;
5817}
5818unsafe extern "C" {
5819    pub fn cusparseCcsrgeam2_bufferSizeExt(
5820        handle: cusparseHandle_t,
5821        m: ::core::ffi::c_int,
5822        n: ::core::ffi::c_int,
5823        alpha: *const cuComplex,
5824        descrA: cusparseMatDescr_t,
5825        nnzA: ::core::ffi::c_int,
5826        csrSortedValA: *const cuComplex,
5827        csrSortedRowPtrA: *const ::core::ffi::c_int,
5828        csrSortedColIndA: *const ::core::ffi::c_int,
5829        beta: *const cuComplex,
5830        descrB: cusparseMatDescr_t,
5831        nnzB: ::core::ffi::c_int,
5832        csrSortedValB: *const cuComplex,
5833        csrSortedRowPtrB: *const ::core::ffi::c_int,
5834        csrSortedColIndB: *const ::core::ffi::c_int,
5835        descrC: cusparseMatDescr_t,
5836        csrSortedValC: *const cuComplex,
5837        csrSortedRowPtrC: *const ::core::ffi::c_int,
5838        csrSortedColIndC: *const ::core::ffi::c_int,
5839        pBufferSizeInBytes: *mut size_t,
5840    ) -> cusparseStatus_t;
5841}
5842unsafe extern "C" {
5843    pub fn cusparseZcsrgeam2_bufferSizeExt(
5844        handle: cusparseHandle_t,
5845        m: ::core::ffi::c_int,
5846        n: ::core::ffi::c_int,
5847        alpha: *const cuDoubleComplex,
5848        descrA: cusparseMatDescr_t,
5849        nnzA: ::core::ffi::c_int,
5850        csrSortedValA: *const cuDoubleComplex,
5851        csrSortedRowPtrA: *const ::core::ffi::c_int,
5852        csrSortedColIndA: *const ::core::ffi::c_int,
5853        beta: *const cuDoubleComplex,
5854        descrB: cusparseMatDescr_t,
5855        nnzB: ::core::ffi::c_int,
5856        csrSortedValB: *const cuDoubleComplex,
5857        csrSortedRowPtrB: *const ::core::ffi::c_int,
5858        csrSortedColIndB: *const ::core::ffi::c_int,
5859        descrC: cusparseMatDescr_t,
5860        csrSortedValC: *const cuDoubleComplex,
5861        csrSortedRowPtrC: *const ::core::ffi::c_int,
5862        csrSortedColIndC: *const ::core::ffi::c_int,
5863        pBufferSizeInBytes: *mut size_t,
5864    ) -> cusparseStatus_t;
5865}
5866unsafe extern "C" {
5867    pub fn cusparseXcsrgeam2Nnz(
5868        handle: cusparseHandle_t,
5869        m: ::core::ffi::c_int,
5870        n: ::core::ffi::c_int,
5871        descrA: cusparseMatDescr_t,
5872        nnzA: ::core::ffi::c_int,
5873        csrSortedRowPtrA: *const ::core::ffi::c_int,
5874        csrSortedColIndA: *const ::core::ffi::c_int,
5875        descrB: cusparseMatDescr_t,
5876        nnzB: ::core::ffi::c_int,
5877        csrSortedRowPtrB: *const ::core::ffi::c_int,
5878        csrSortedColIndB: *const ::core::ffi::c_int,
5879        descrC: cusparseMatDescr_t,
5880        csrSortedRowPtrC: *mut ::core::ffi::c_int,
5881        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
5882        workspace: *mut ::core::ffi::c_void,
5883    ) -> cusparseStatus_t;
5884}
5885unsafe extern "C" {
5886    /// This function performs following matrix-matrix operation
5887    ///
5888    /// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1`elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse[S|D|C|Z]csrgeam2()` to complete matrix `C`.
5889    ///
5890    /// The general procedure is as follows:
5891    ///
5892    /// Several comments on `csrgeam2()`:
5893    ///
5894    /// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
5895    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
5896    /// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse[S|D|C|Z]geam()` only for each iteration.
5897    /// * The pointers `alpha` and `beta` must be valid.
5898    /// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
5899    /// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
5900    /// * This function requires temporary extra storage that is allocated internally.
5901    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5902    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5903    pub fn cusparseScsrgeam2(
5904        handle: cusparseHandle_t,
5905        m: ::core::ffi::c_int,
5906        n: ::core::ffi::c_int,
5907        alpha: *const f32,
5908        descrA: cusparseMatDescr_t,
5909        nnzA: ::core::ffi::c_int,
5910        csrSortedValA: *const f32,
5911        csrSortedRowPtrA: *const ::core::ffi::c_int,
5912        csrSortedColIndA: *const ::core::ffi::c_int,
5913        beta: *const f32,
5914        descrB: cusparseMatDescr_t,
5915        nnzB: ::core::ffi::c_int,
5916        csrSortedValB: *const f32,
5917        csrSortedRowPtrB: *const ::core::ffi::c_int,
5918        csrSortedColIndB: *const ::core::ffi::c_int,
5919        descrC: cusparseMatDescr_t,
5920        csrSortedValC: *mut f32,
5921        csrSortedRowPtrC: *mut ::core::ffi::c_int,
5922        csrSortedColIndC: *mut ::core::ffi::c_int,
5923        pBuffer: *mut ::core::ffi::c_void,
5924    ) -> cusparseStatus_t;
5925}
5926unsafe extern "C" {
5927    /// This function performs following matrix-matrix operation
5928    ///
5929    /// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1`elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse[S|D|C|Z]csrgeam2()` to complete matrix `C`.
5930    ///
5931    /// The general procedure is as follows:
5932    ///
5933    /// Several comments on `csrgeam2()`:
5934    ///
5935    /// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
5936    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
5937    /// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse[S|D|C|Z]geam()` only for each iteration.
5938    /// * The pointers `alpha` and `beta` must be valid.
5939    /// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
5940    /// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
5941    /// * This function requires temporary extra storage that is allocated internally.
5942    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5943    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5944    pub fn cusparseDcsrgeam2(
5945        handle: cusparseHandle_t,
5946        m: ::core::ffi::c_int,
5947        n: ::core::ffi::c_int,
5948        alpha: *const f64,
5949        descrA: cusparseMatDescr_t,
5950        nnzA: ::core::ffi::c_int,
5951        csrSortedValA: *const f64,
5952        csrSortedRowPtrA: *const ::core::ffi::c_int,
5953        csrSortedColIndA: *const ::core::ffi::c_int,
5954        beta: *const f64,
5955        descrB: cusparseMatDescr_t,
5956        nnzB: ::core::ffi::c_int,
5957        csrSortedValB: *const f64,
5958        csrSortedRowPtrB: *const ::core::ffi::c_int,
5959        csrSortedColIndB: *const ::core::ffi::c_int,
5960        descrC: cusparseMatDescr_t,
5961        csrSortedValC: *mut f64,
5962        csrSortedRowPtrC: *mut ::core::ffi::c_int,
5963        csrSortedColIndC: *mut ::core::ffi::c_int,
5964        pBuffer: *mut ::core::ffi::c_void,
5965    ) -> cusparseStatus_t;
5966}
5967unsafe extern "C" {
5968    /// This function performs following matrix-matrix operation
5969    ///
5970    /// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1`elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse[S|D|C|Z]csrgeam2()` to complete matrix `C`.
5971    ///
5972    /// The general procedure is as follows:
5973    ///
5974    /// Several comments on `csrgeam2()`:
5975    ///
5976    /// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
5977    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
5978    /// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse[S|D|C|Z]geam()` only for each iteration.
5979    /// * The pointers `alpha` and `beta` must be valid.
5980    /// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
5981    /// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
5982    /// * This function requires temporary extra storage that is allocated internally.
5983    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
5984    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
5985    pub fn cusparseCcsrgeam2(
5986        handle: cusparseHandle_t,
5987        m: ::core::ffi::c_int,
5988        n: ::core::ffi::c_int,
5989        alpha: *const cuComplex,
5990        descrA: cusparseMatDescr_t,
5991        nnzA: ::core::ffi::c_int,
5992        csrSortedValA: *const cuComplex,
5993        csrSortedRowPtrA: *const ::core::ffi::c_int,
5994        csrSortedColIndA: *const ::core::ffi::c_int,
5995        beta: *const cuComplex,
5996        descrB: cusparseMatDescr_t,
5997        nnzB: ::core::ffi::c_int,
5998        csrSortedValB: *const cuComplex,
5999        csrSortedRowPtrB: *const ::core::ffi::c_int,
6000        csrSortedColIndB: *const ::core::ffi::c_int,
6001        descrC: cusparseMatDescr_t,
6002        csrSortedValC: *mut cuComplex,
6003        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6004        csrSortedColIndC: *mut ::core::ffi::c_int,
6005        pBuffer: *mut ::core::ffi::c_void,
6006    ) -> cusparseStatus_t;
6007}
6008unsafe extern "C" {
6009    /// This function performs following matrix-matrix operation
6010    ///
6011    /// where `A`, `B`, and `C` are $m \times n$ sparse matrices (defined in CSR storage format by the three arrays `csrValA|csrValB|csrValC`, `csrRowPtrA|csrRowPtrB|csrRowPtrC`, and `csrColIndA|csrColIndB|csrcolIndC` respectively), and $\alpha\text{~and~}\beta$ are scalars. Since `A` and `B` have different sparsity patterns, cuSPARSE adopts a two-step approach to complete sparse matrix `C`. In the first step, the user allocates `csrRowPtrC` of `m+1`elements and uses function [`cusparseXcsrgeam2Nnz`] to determine `csrRowPtrC` and the total number of nonzero elements. In the second step, the user gathers `nnzC` (number of nonzero elements of matrix `C`) from either `(nnzC=*nnzTotalDevHostPtr)` or `(nnzC=csrRowPtrC(m)-csrRowPtrC(0))` and allocates `csrValC, csrColIndC` of `nnzC` elements respectively, then finally calls function `cusparse[S|D|C|Z]csrgeam2()` to complete matrix `C`.
6012    ///
6013    /// The general procedure is as follows:
6014    ///
6015    /// Several comments on `csrgeam2()`:
6016    ///
6017    /// * The other three combinations, NT, TN, and TT, are not supported by cuSPARSE. In order to do any one of the three, the user should use the routine `csr2csc()` to convert $A$ | $B$ to $A^{T}$ | $B^{T}$.
6018    /// * Only [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] is supported. If either `A` or `B` is symmetric or Hermitian, then the user must extend the matrix to a full one and reconfigure the `MatrixType` field of the descriptor to [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`].
6019    /// * If the sparsity pattern of matrix `C` is known, the user can skip the call to function [`cusparseXcsrgeam2Nnz`]. For example, suppose that the user has an iterative algorithm which would update `A` and `B` iteratively but keep the sparsity patterns. The user can call function [`cusparseXcsrgeam2Nnz`] once to set up the sparsity pattern of `C`, then call function `cusparse[S|D|C|Z]geam()` only for each iteration.
6020    /// * The pointers `alpha` and `beta` must be valid.
6021    /// * When `alpha` or `beta` is zero, it is not considered a special case by cuSPARSE. The sparsity pattern of `C` is independent of the value of `alpha` and `beta`. If the user wants $C = 0 \times A + 1 \times B^{T}$, then `csr2csc()` is better than `csrgeam2()`.
6022    /// * `csrgeam2()` is the same as `csrgeam()` except `csrgeam2()` needs explicit buffer where `csrgeam()` allocates the buffer internally.
6023    /// * This function requires temporary extra storage that is allocated internally.
6024    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6025    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6026    pub fn cusparseZcsrgeam2(
6027        handle: cusparseHandle_t,
6028        m: ::core::ffi::c_int,
6029        n: ::core::ffi::c_int,
6030        alpha: *const cuDoubleComplex,
6031        descrA: cusparseMatDescr_t,
6032        nnzA: ::core::ffi::c_int,
6033        csrSortedValA: *const cuDoubleComplex,
6034        csrSortedRowPtrA: *const ::core::ffi::c_int,
6035        csrSortedColIndA: *const ::core::ffi::c_int,
6036        beta: *const cuDoubleComplex,
6037        descrB: cusparseMatDescr_t,
6038        nnzB: ::core::ffi::c_int,
6039        csrSortedValB: *const cuDoubleComplex,
6040        csrSortedRowPtrB: *const ::core::ffi::c_int,
6041        csrSortedColIndB: *const ::core::ffi::c_int,
6042        descrC: cusparseMatDescr_t,
6043        csrSortedValC: *mut cuDoubleComplex,
6044        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6045        csrSortedColIndC: *mut ::core::ffi::c_int,
6046        pBuffer: *mut ::core::ffi::c_void,
6047    ) -> cusparseStatus_t;
6048}
6049unsafe extern "C" {
6050    /// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
6051    ///
6052    /// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
6053    ///
6054    /// * This function requires temporary extra storage that is allocated internally.
6055    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6056    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6057    pub fn cusparseScsrcolor(
6058        handle: cusparseHandle_t,
6059        m: ::core::ffi::c_int,
6060        nnz: ::core::ffi::c_int,
6061        descrA: cusparseMatDescr_t,
6062        csrSortedValA: *const f32,
6063        csrSortedRowPtrA: *const ::core::ffi::c_int,
6064        csrSortedColIndA: *const ::core::ffi::c_int,
6065        fractionToColor: *const f32,
6066        ncolors: *mut ::core::ffi::c_int,
6067        coloring: *mut ::core::ffi::c_int,
6068        reordering: *mut ::core::ffi::c_int,
6069        info: cusparseColorInfo_t,
6070    ) -> cusparseStatus_t;
6071}
6072unsafe extern "C" {
6073    /// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
6074    ///
6075    /// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
6076    ///
6077    /// * This function requires temporary extra storage that is allocated internally.
6078    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6079    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6080    pub fn cusparseDcsrcolor(
6081        handle: cusparseHandle_t,
6082        m: ::core::ffi::c_int,
6083        nnz: ::core::ffi::c_int,
6084        descrA: cusparseMatDescr_t,
6085        csrSortedValA: *const f64,
6086        csrSortedRowPtrA: *const ::core::ffi::c_int,
6087        csrSortedColIndA: *const ::core::ffi::c_int,
6088        fractionToColor: *const f64,
6089        ncolors: *mut ::core::ffi::c_int,
6090        coloring: *mut ::core::ffi::c_int,
6091        reordering: *mut ::core::ffi::c_int,
6092        info: cusparseColorInfo_t,
6093    ) -> cusparseStatus_t;
6094}
6095unsafe extern "C" {
6096    /// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
6097    ///
6098    /// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
6099    ///
6100    /// * This function requires temporary extra storage that is allocated internally.
6101    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6102    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6103    pub fn cusparseCcsrcolor(
6104        handle: cusparseHandle_t,
6105        m: ::core::ffi::c_int,
6106        nnz: ::core::ffi::c_int,
6107        descrA: cusparseMatDescr_t,
6108        csrSortedValA: *const cuComplex,
6109        csrSortedRowPtrA: *const ::core::ffi::c_int,
6110        csrSortedColIndA: *const ::core::ffi::c_int,
6111        fractionToColor: *const f32,
6112        ncolors: *mut ::core::ffi::c_int,
6113        coloring: *mut ::core::ffi::c_int,
6114        reordering: *mut ::core::ffi::c_int,
6115        info: cusparseColorInfo_t,
6116    ) -> cusparseStatus_t;
6117}
6118unsafe extern "C" {
6119    /// This function performs the coloring of the adjacency graph associated with the matrix A stored in CSR format. The coloring is an assignment of colors (integer numbers) to nodes, such that neighboring nodes have distinct colors. An approximate coloring algorithm is used in this routine, and is stopped when a certain percentage of nodes has been colored. The rest of the nodes are assigned distinct colors (an increasing sequence of integers numbers, starting from the last integer used previously). The last two auxiliary routines can be used to extract the resulting number of colors, their assignment and the associated reordering. The reordering is such that nodes that have been assigned the same color are reordered to be next to each other.
6120    ///
6121    /// The matrix A passed to this routine, must be stored as a general matrix and have a symmetric sparsity pattern. If the matrix is nonsymmetric the user should pass A+A^T as a parameter to this routine.
6122    ///
6123    /// * This function requires temporary extra storage that is allocated internally.
6124    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6125    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6126    pub fn cusparseZcsrcolor(
6127        handle: cusparseHandle_t,
6128        m: ::core::ffi::c_int,
6129        nnz: ::core::ffi::c_int,
6130        descrA: cusparseMatDescr_t,
6131        csrSortedValA: *const cuDoubleComplex,
6132        csrSortedRowPtrA: *const ::core::ffi::c_int,
6133        csrSortedColIndA: *const ::core::ffi::c_int,
6134        fractionToColor: *const f64,
6135        ncolors: *mut ::core::ffi::c_int,
6136        coloring: *mut ::core::ffi::c_int,
6137        reordering: *mut ::core::ffi::c_int,
6138        info: cusparseColorInfo_t,
6139    ) -> cusparseStatus_t;
6140}
6141unsafe extern "C" {
6142    /// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
6143    ///
6144    /// * This function requires temporary extra storage that is allocated internally.
6145    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6146    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6147    pub fn cusparseSnnz(
6148        handle: cusparseHandle_t,
6149        dirA: cusparseDirection_t,
6150        m: ::core::ffi::c_int,
6151        n: ::core::ffi::c_int,
6152        descrA: cusparseMatDescr_t,
6153        A: *const f32,
6154        lda: ::core::ffi::c_int,
6155        nnzPerRowCol: *mut ::core::ffi::c_int,
6156        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
6157    ) -> cusparseStatus_t;
6158}
6159unsafe extern "C" {
6160    /// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
6161    ///
6162    /// * This function requires temporary extra storage that is allocated internally.
6163    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6164    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6165    pub fn cusparseDnnz(
6166        handle: cusparseHandle_t,
6167        dirA: cusparseDirection_t,
6168        m: ::core::ffi::c_int,
6169        n: ::core::ffi::c_int,
6170        descrA: cusparseMatDescr_t,
6171        A: *const f64,
6172        lda: ::core::ffi::c_int,
6173        nnzPerRowCol: *mut ::core::ffi::c_int,
6174        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
6175    ) -> cusparseStatus_t;
6176}
6177unsafe extern "C" {
6178    /// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
6179    ///
6180    /// * This function requires temporary extra storage that is allocated internally.
6181    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6182    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6183    pub fn cusparseCnnz(
6184        handle: cusparseHandle_t,
6185        dirA: cusparseDirection_t,
6186        m: ::core::ffi::c_int,
6187        n: ::core::ffi::c_int,
6188        descrA: cusparseMatDescr_t,
6189        A: *const cuComplex,
6190        lda: ::core::ffi::c_int,
6191        nnzPerRowCol: *mut ::core::ffi::c_int,
6192        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
6193    ) -> cusparseStatus_t;
6194}
6195unsafe extern "C" {
6196    /// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
6197    ///
6198    /// * This function requires temporary extra storage that is allocated internally.
6199    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6200    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6201    pub fn cusparseZnnz(
6202        handle: cusparseHandle_t,
6203        dirA: cusparseDirection_t,
6204        m: ::core::ffi::c_int,
6205        n: ::core::ffi::c_int,
6206        descrA: cusparseMatDescr_t,
6207        A: *const cuDoubleComplex,
6208        lda: ::core::ffi::c_int,
6209        nnzPerRowCol: *mut ::core::ffi::c_int,
6210        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
6211    ) -> cusparseStatus_t;
6212}
6213unsafe extern "C" {
6214    /// This function is the step one to convert from csr format to compressed csr format.
6215    ///
6216    /// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
6217    ///
6218    /// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
6219    ///
6220    /// * This function requires temporary extra storage that is allocated internally.
6221    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6222    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6223    pub fn cusparseSnnz_compress(
6224        handle: cusparseHandle_t,
6225        m: ::core::ffi::c_int,
6226        descr: cusparseMatDescr_t,
6227        csrSortedValA: *const f32,
6228        csrSortedRowPtrA: *const ::core::ffi::c_int,
6229        nnzPerRow: *mut ::core::ffi::c_int,
6230        nnzC: *mut ::core::ffi::c_int,
6231        tol: f32,
6232    ) -> cusparseStatus_t;
6233}
6234unsafe extern "C" {
6235    /// This function is the step one to convert from csr format to compressed csr format.
6236    ///
6237    /// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
6238    ///
6239    /// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
6240    ///
6241    /// * This function requires temporary extra storage that is allocated internally.
6242    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6243    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6244    pub fn cusparseDnnz_compress(
6245        handle: cusparseHandle_t,
6246        m: ::core::ffi::c_int,
6247        descr: cusparseMatDescr_t,
6248        csrSortedValA: *const f64,
6249        csrSortedRowPtrA: *const ::core::ffi::c_int,
6250        nnzPerRow: *mut ::core::ffi::c_int,
6251        nnzC: *mut ::core::ffi::c_int,
6252        tol: f64,
6253    ) -> cusparseStatus_t;
6254}
6255unsafe extern "C" {
6256    /// This function is the step one to convert from csr format to compressed csr format.
6257    ///
6258    /// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
6259    ///
6260    /// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
6261    ///
6262    /// * This function requires temporary extra storage that is allocated internally.
6263    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6264    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6265    pub fn cusparseCnnz_compress(
6266        handle: cusparseHandle_t,
6267        m: ::core::ffi::c_int,
6268        descr: cusparseMatDescr_t,
6269        csrSortedValA: *const cuComplex,
6270        csrSortedRowPtrA: *const ::core::ffi::c_int,
6271        nnzPerRow: *mut ::core::ffi::c_int,
6272        nnzC: *mut ::core::ffi::c_int,
6273        tol: cuComplex,
6274    ) -> cusparseStatus_t;
6275}
6276unsafe extern "C" {
6277    /// This function is the step one to convert from csr format to compressed csr format.
6278    ///
6279    /// Given a sparse matrix A and a non-negative value threshold, the function returns nnzPerRow(the number of nonzeros columns per row) and nnzC(the total number of nonzeros) of a sparse matrix C, defined by
6280    ///
6281    /// A key assumption for the cuComplex and cuDoubleComplex case is that this tolerance is given as the real part. For example `tol = 1e-8 + 0*i` and we extract cureal, that is the x component of this struct.
6282    ///
6283    /// * This function requires temporary extra storage that is allocated internally.
6284    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
6285    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6286    pub fn cusparseZnnz_compress(
6287        handle: cusparseHandle_t,
6288        m: ::core::ffi::c_int,
6289        descr: cusparseMatDescr_t,
6290        csrSortedValA: *const cuDoubleComplex,
6291        csrSortedRowPtrA: *const ::core::ffi::c_int,
6292        nnzPerRow: *mut ::core::ffi::c_int,
6293        nnzC: *mut ::core::ffi::c_int,
6294        tol: cuDoubleComplex,
6295    ) -> cusparseStatus_t;
6296}
6297unsafe extern "C" {
6298    pub fn cusparseScsr2csr_compress(
6299        handle: cusparseHandle_t,
6300        m: ::core::ffi::c_int,
6301        n: ::core::ffi::c_int,
6302        descrA: cusparseMatDescr_t,
6303        csrSortedValA: *const f32,
6304        csrSortedColIndA: *const ::core::ffi::c_int,
6305        csrSortedRowPtrA: *const ::core::ffi::c_int,
6306        nnzA: ::core::ffi::c_int,
6307        nnzPerRow: *const ::core::ffi::c_int,
6308        csrSortedValC: *mut f32,
6309        csrSortedColIndC: *mut ::core::ffi::c_int,
6310        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6311        tol: f32,
6312    ) -> cusparseStatus_t;
6313}
6314unsafe extern "C" {
6315    pub fn cusparseDcsr2csr_compress(
6316        handle: cusparseHandle_t,
6317        m: ::core::ffi::c_int,
6318        n: ::core::ffi::c_int,
6319        descrA: cusparseMatDescr_t,
6320        csrSortedValA: *const f64,
6321        csrSortedColIndA: *const ::core::ffi::c_int,
6322        csrSortedRowPtrA: *const ::core::ffi::c_int,
6323        nnzA: ::core::ffi::c_int,
6324        nnzPerRow: *const ::core::ffi::c_int,
6325        csrSortedValC: *mut f64,
6326        csrSortedColIndC: *mut ::core::ffi::c_int,
6327        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6328        tol: f64,
6329    ) -> cusparseStatus_t;
6330}
6331unsafe extern "C" {
6332    pub fn cusparseCcsr2csr_compress(
6333        handle: cusparseHandle_t,
6334        m: ::core::ffi::c_int,
6335        n: ::core::ffi::c_int,
6336        descrA: cusparseMatDescr_t,
6337        csrSortedValA: *const cuComplex,
6338        csrSortedColIndA: *const ::core::ffi::c_int,
6339        csrSortedRowPtrA: *const ::core::ffi::c_int,
6340        nnzA: ::core::ffi::c_int,
6341        nnzPerRow: *const ::core::ffi::c_int,
6342        csrSortedValC: *mut cuComplex,
6343        csrSortedColIndC: *mut ::core::ffi::c_int,
6344        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6345        tol: cuComplex,
6346    ) -> cusparseStatus_t;
6347}
6348unsafe extern "C" {
6349    pub fn cusparseZcsr2csr_compress(
6350        handle: cusparseHandle_t,
6351        m: ::core::ffi::c_int,
6352        n: ::core::ffi::c_int,
6353        descrA: cusparseMatDescr_t,
6354        csrSortedValA: *const cuDoubleComplex,
6355        csrSortedColIndA: *const ::core::ffi::c_int,
6356        csrSortedRowPtrA: *const ::core::ffi::c_int,
6357        nnzA: ::core::ffi::c_int,
6358        nnzPerRow: *const ::core::ffi::c_int,
6359        csrSortedValC: *mut cuDoubleComplex,
6360        csrSortedColIndC: *mut ::core::ffi::c_int,
6361        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6362        tol: cuDoubleComplex,
6363    ) -> cusparseStatus_t;
6364}
6365unsafe extern "C" {
6366    /// This function converts the array containing the uncompressed row indices (corresponding to COO format) into an array of compressed row pointers (corresponding to CSR format).
6367    ///
6368    /// It can also be used to convert the array containing the uncompressed column indices (corresponding to COO format) into an array of column pointers (corresponding to CSC format).
6369    ///
6370    /// * The routine requires no extra storage.
6371    /// * The routine supports asynchronous execution.
6372    /// * The routine supports CUDA graph capture.
6373    pub fn cusparseXcoo2csr(
6374        handle: cusparseHandle_t,
6375        cooRowInd: *const ::core::ffi::c_int,
6376        nnz: ::core::ffi::c_int,
6377        m: ::core::ffi::c_int,
6378        csrSortedRowPtr: *mut ::core::ffi::c_int,
6379        idxBase: cusparseIndexBase_t,
6380    ) -> cusparseStatus_t;
6381}
6382unsafe extern "C" {
6383    /// This function converts the array containing the compressed row pointers (corresponding to CSR format) into an array of uncompressed row indices (corresponding to COO format).
6384    ///
6385    /// It can also be used to convert the array containing the compressed column indices (corresponding to CSC format) into an array of uncompressed column indices (corresponding to COO format).
6386    ///
6387    /// * The routine requires no extra storage.
6388    /// * The routine supports asynchronous execution.
6389    /// * The routine supports CUDA graph capture.
6390    pub fn cusparseXcsr2coo(
6391        handle: cusparseHandle_t,
6392        csrSortedRowPtr: *const ::core::ffi::c_int,
6393        nnz: ::core::ffi::c_int,
6394        m: ::core::ffi::c_int,
6395        cooRowInd: *mut ::core::ffi::c_int,
6396        idxBase: cusparseIndexBase_t,
6397    ) -> cusparseStatus_t;
6398}
6399unsafe extern "C" {
6400    pub fn cusparseXcsr2bsrNnz(
6401        handle: cusparseHandle_t,
6402        dirA: cusparseDirection_t,
6403        m: ::core::ffi::c_int,
6404        n: ::core::ffi::c_int,
6405        descrA: cusparseMatDescr_t,
6406        csrSortedRowPtrA: *const ::core::ffi::c_int,
6407        csrSortedColIndA: *const ::core::ffi::c_int,
6408        blockDim: ::core::ffi::c_int,
6409        descrC: cusparseMatDescr_t,
6410        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
6411        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
6412    ) -> cusparseStatus_t;
6413}
6414unsafe extern "C" {
6415    pub fn cusparseScsr2bsr(
6416        handle: cusparseHandle_t,
6417        dirA: cusparseDirection_t,
6418        m: ::core::ffi::c_int,
6419        n: ::core::ffi::c_int,
6420        descrA: cusparseMatDescr_t,
6421        csrSortedValA: *const f32,
6422        csrSortedRowPtrA: *const ::core::ffi::c_int,
6423        csrSortedColIndA: *const ::core::ffi::c_int,
6424        blockDim: ::core::ffi::c_int,
6425        descrC: cusparseMatDescr_t,
6426        bsrSortedValC: *mut f32,
6427        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
6428        bsrSortedColIndC: *mut ::core::ffi::c_int,
6429    ) -> cusparseStatus_t;
6430}
6431unsafe extern "C" {
6432    pub fn cusparseDcsr2bsr(
6433        handle: cusparseHandle_t,
6434        dirA: cusparseDirection_t,
6435        m: ::core::ffi::c_int,
6436        n: ::core::ffi::c_int,
6437        descrA: cusparseMatDescr_t,
6438        csrSortedValA: *const f64,
6439        csrSortedRowPtrA: *const ::core::ffi::c_int,
6440        csrSortedColIndA: *const ::core::ffi::c_int,
6441        blockDim: ::core::ffi::c_int,
6442        descrC: cusparseMatDescr_t,
6443        bsrSortedValC: *mut f64,
6444        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
6445        bsrSortedColIndC: *mut ::core::ffi::c_int,
6446    ) -> cusparseStatus_t;
6447}
6448unsafe extern "C" {
6449    pub fn cusparseCcsr2bsr(
6450        handle: cusparseHandle_t,
6451        dirA: cusparseDirection_t,
6452        m: ::core::ffi::c_int,
6453        n: ::core::ffi::c_int,
6454        descrA: cusparseMatDescr_t,
6455        csrSortedValA: *const cuComplex,
6456        csrSortedRowPtrA: *const ::core::ffi::c_int,
6457        csrSortedColIndA: *const ::core::ffi::c_int,
6458        blockDim: ::core::ffi::c_int,
6459        descrC: cusparseMatDescr_t,
6460        bsrSortedValC: *mut cuComplex,
6461        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
6462        bsrSortedColIndC: *mut ::core::ffi::c_int,
6463    ) -> cusparseStatus_t;
6464}
6465unsafe extern "C" {
6466    pub fn cusparseZcsr2bsr(
6467        handle: cusparseHandle_t,
6468        dirA: cusparseDirection_t,
6469        m: ::core::ffi::c_int,
6470        n: ::core::ffi::c_int,
6471        descrA: cusparseMatDescr_t,
6472        csrSortedValA: *const cuDoubleComplex,
6473        csrSortedRowPtrA: *const ::core::ffi::c_int,
6474        csrSortedColIndA: *const ::core::ffi::c_int,
6475        blockDim: ::core::ffi::c_int,
6476        descrC: cusparseMatDescr_t,
6477        bsrSortedValC: *mut cuDoubleComplex,
6478        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
6479        bsrSortedColIndC: *mut ::core::ffi::c_int,
6480    ) -> cusparseStatus_t;
6481}
6482unsafe extern "C" {
6483    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6484    ///
6485    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6486    ///
6487    /// `csrRowPtrC` of `m+1` elements
6488    ///
6489    /// `csrValC` of `nnz` elements
6490    ///
6491    /// `csrColIndC` of `nnz` elements
6492    ///
6493    /// The general procedure is as follows:
6494    ///
6495    /// * The routine requires no extra storage
6496    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6497    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6498    pub fn cusparseSbsr2csr(
6499        handle: cusparseHandle_t,
6500        dirA: cusparseDirection_t,
6501        mb: ::core::ffi::c_int,
6502        nb: ::core::ffi::c_int,
6503        descrA: cusparseMatDescr_t,
6504        bsrSortedValA: *const f32,
6505        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6506        bsrSortedColIndA: *const ::core::ffi::c_int,
6507        blockDim: ::core::ffi::c_int,
6508        descrC: cusparseMatDescr_t,
6509        csrSortedValC: *mut f32,
6510        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6511        csrSortedColIndC: *mut ::core::ffi::c_int,
6512    ) -> cusparseStatus_t;
6513}
6514unsafe extern "C" {
6515    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6516    ///
6517    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6518    ///
6519    /// `csrRowPtrC` of `m+1` elements
6520    ///
6521    /// `csrValC` of `nnz` elements
6522    ///
6523    /// `csrColIndC` of `nnz` elements
6524    ///
6525    /// The general procedure is as follows:
6526    ///
6527    /// * The routine requires no extra storage
6528    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6529    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6530    pub fn cusparseDbsr2csr(
6531        handle: cusparseHandle_t,
6532        dirA: cusparseDirection_t,
6533        mb: ::core::ffi::c_int,
6534        nb: ::core::ffi::c_int,
6535        descrA: cusparseMatDescr_t,
6536        bsrSortedValA: *const f64,
6537        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6538        bsrSortedColIndA: *const ::core::ffi::c_int,
6539        blockDim: ::core::ffi::c_int,
6540        descrC: cusparseMatDescr_t,
6541        csrSortedValC: *mut f64,
6542        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6543        csrSortedColIndC: *mut ::core::ffi::c_int,
6544    ) -> cusparseStatus_t;
6545}
6546unsafe extern "C" {
6547    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6548    ///
6549    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6550    ///
6551    /// `csrRowPtrC` of `m+1` elements
6552    ///
6553    /// `csrValC` of `nnz` elements
6554    ///
6555    /// `csrColIndC` of `nnz` elements
6556    ///
6557    /// The general procedure is as follows:
6558    ///
6559    /// * The routine requires no extra storage
6560    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6561    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6562    pub fn cusparseCbsr2csr(
6563        handle: cusparseHandle_t,
6564        dirA: cusparseDirection_t,
6565        mb: ::core::ffi::c_int,
6566        nb: ::core::ffi::c_int,
6567        descrA: cusparseMatDescr_t,
6568        bsrSortedValA: *const cuComplex,
6569        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6570        bsrSortedColIndA: *const ::core::ffi::c_int,
6571        blockDim: ::core::ffi::c_int,
6572        descrC: cusparseMatDescr_t,
6573        csrSortedValC: *mut cuComplex,
6574        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6575        csrSortedColIndC: *mut ::core::ffi::c_int,
6576    ) -> cusparseStatus_t;
6577}
6578unsafe extern "C" {
6579    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6580    ///
6581    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6582    ///
6583    /// `csrRowPtrC` of `m+1` elements
6584    ///
6585    /// `csrValC` of `nnz` elements
6586    ///
6587    /// `csrColIndC` of `nnz` elements
6588    ///
6589    /// The general procedure is as follows:
6590    ///
6591    /// * The routine requires no extra storage
6592    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6593    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6594    pub fn cusparseZbsr2csr(
6595        handle: cusparseHandle_t,
6596        dirA: cusparseDirection_t,
6597        mb: ::core::ffi::c_int,
6598        nb: ::core::ffi::c_int,
6599        descrA: cusparseMatDescr_t,
6600        bsrSortedValA: *const cuDoubleComplex,
6601        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6602        bsrSortedColIndA: *const ::core::ffi::c_int,
6603        blockDim: ::core::ffi::c_int,
6604        descrC: cusparseMatDescr_t,
6605        csrSortedValC: *mut cuDoubleComplex,
6606        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6607        csrSortedColIndC: *mut ::core::ffi::c_int,
6608    ) -> cusparseStatus_t;
6609}
6610unsafe extern "C" {
6611    pub fn cusparseSgebsr2gebsc_bufferSize(
6612        handle: cusparseHandle_t,
6613        mb: ::core::ffi::c_int,
6614        nb: ::core::ffi::c_int,
6615        nnzb: ::core::ffi::c_int,
6616        bsrSortedVal: *const f32,
6617        bsrSortedRowPtr: *const ::core::ffi::c_int,
6618        bsrSortedColInd: *const ::core::ffi::c_int,
6619        rowBlockDim: ::core::ffi::c_int,
6620        colBlockDim: ::core::ffi::c_int,
6621        pBufferSizeInBytes: *mut ::core::ffi::c_int,
6622    ) -> cusparseStatus_t;
6623}
6624unsafe extern "C" {
6625    pub fn cusparseDgebsr2gebsc_bufferSize(
6626        handle: cusparseHandle_t,
6627        mb: ::core::ffi::c_int,
6628        nb: ::core::ffi::c_int,
6629        nnzb: ::core::ffi::c_int,
6630        bsrSortedVal: *const f64,
6631        bsrSortedRowPtr: *const ::core::ffi::c_int,
6632        bsrSortedColInd: *const ::core::ffi::c_int,
6633        rowBlockDim: ::core::ffi::c_int,
6634        colBlockDim: ::core::ffi::c_int,
6635        pBufferSizeInBytes: *mut ::core::ffi::c_int,
6636    ) -> cusparseStatus_t;
6637}
6638unsafe extern "C" {
6639    pub fn cusparseCgebsr2gebsc_bufferSize(
6640        handle: cusparseHandle_t,
6641        mb: ::core::ffi::c_int,
6642        nb: ::core::ffi::c_int,
6643        nnzb: ::core::ffi::c_int,
6644        bsrSortedVal: *const cuComplex,
6645        bsrSortedRowPtr: *const ::core::ffi::c_int,
6646        bsrSortedColInd: *const ::core::ffi::c_int,
6647        rowBlockDim: ::core::ffi::c_int,
6648        colBlockDim: ::core::ffi::c_int,
6649        pBufferSizeInBytes: *mut ::core::ffi::c_int,
6650    ) -> cusparseStatus_t;
6651}
6652unsafe extern "C" {
6653    pub fn cusparseZgebsr2gebsc_bufferSize(
6654        handle: cusparseHandle_t,
6655        mb: ::core::ffi::c_int,
6656        nb: ::core::ffi::c_int,
6657        nnzb: ::core::ffi::c_int,
6658        bsrSortedVal: *const cuDoubleComplex,
6659        bsrSortedRowPtr: *const ::core::ffi::c_int,
6660        bsrSortedColInd: *const ::core::ffi::c_int,
6661        rowBlockDim: ::core::ffi::c_int,
6662        colBlockDim: ::core::ffi::c_int,
6663        pBufferSizeInBytes: *mut ::core::ffi::c_int,
6664    ) -> cusparseStatus_t;
6665}
6666unsafe extern "C" {
6667    pub fn cusparseSgebsr2gebsc_bufferSizeExt(
6668        handle: cusparseHandle_t,
6669        mb: ::core::ffi::c_int,
6670        nb: ::core::ffi::c_int,
6671        nnzb: ::core::ffi::c_int,
6672        bsrSortedVal: *const f32,
6673        bsrSortedRowPtr: *const ::core::ffi::c_int,
6674        bsrSortedColInd: *const ::core::ffi::c_int,
6675        rowBlockDim: ::core::ffi::c_int,
6676        colBlockDim: ::core::ffi::c_int,
6677        pBufferSize: *mut size_t,
6678    ) -> cusparseStatus_t;
6679}
6680unsafe extern "C" {
6681    pub fn cusparseDgebsr2gebsc_bufferSizeExt(
6682        handle: cusparseHandle_t,
6683        mb: ::core::ffi::c_int,
6684        nb: ::core::ffi::c_int,
6685        nnzb: ::core::ffi::c_int,
6686        bsrSortedVal: *const f64,
6687        bsrSortedRowPtr: *const ::core::ffi::c_int,
6688        bsrSortedColInd: *const ::core::ffi::c_int,
6689        rowBlockDim: ::core::ffi::c_int,
6690        colBlockDim: ::core::ffi::c_int,
6691        pBufferSize: *mut size_t,
6692    ) -> cusparseStatus_t;
6693}
6694unsafe extern "C" {
6695    pub fn cusparseCgebsr2gebsc_bufferSizeExt(
6696        handle: cusparseHandle_t,
6697        mb: ::core::ffi::c_int,
6698        nb: ::core::ffi::c_int,
6699        nnzb: ::core::ffi::c_int,
6700        bsrSortedVal: *const cuComplex,
6701        bsrSortedRowPtr: *const ::core::ffi::c_int,
6702        bsrSortedColInd: *const ::core::ffi::c_int,
6703        rowBlockDim: ::core::ffi::c_int,
6704        colBlockDim: ::core::ffi::c_int,
6705        pBufferSize: *mut size_t,
6706    ) -> cusparseStatus_t;
6707}
6708unsafe extern "C" {
6709    pub fn cusparseZgebsr2gebsc_bufferSizeExt(
6710        handle: cusparseHandle_t,
6711        mb: ::core::ffi::c_int,
6712        nb: ::core::ffi::c_int,
6713        nnzb: ::core::ffi::c_int,
6714        bsrSortedVal: *const cuDoubleComplex,
6715        bsrSortedRowPtr: *const ::core::ffi::c_int,
6716        bsrSortedColInd: *const ::core::ffi::c_int,
6717        rowBlockDim: ::core::ffi::c_int,
6718        colBlockDim: ::core::ffi::c_int,
6719        pBufferSize: *mut size_t,
6720    ) -> cusparseStatus_t;
6721}
6722unsafe extern "C" {
6723    /// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
6724    ///
6725    /// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
6726    ///
6727    /// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
6728    ///
6729    /// * The routine requires no extra storage if `pBuffer != NULL`
6730    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
6731    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6732    pub fn cusparseSgebsr2gebsc(
6733        handle: cusparseHandle_t,
6734        mb: ::core::ffi::c_int,
6735        nb: ::core::ffi::c_int,
6736        nnzb: ::core::ffi::c_int,
6737        bsrSortedVal: *const f32,
6738        bsrSortedRowPtr: *const ::core::ffi::c_int,
6739        bsrSortedColInd: *const ::core::ffi::c_int,
6740        rowBlockDim: ::core::ffi::c_int,
6741        colBlockDim: ::core::ffi::c_int,
6742        bscVal: *mut f32,
6743        bscRowInd: *mut ::core::ffi::c_int,
6744        bscColPtr: *mut ::core::ffi::c_int,
6745        copyValues: cusparseAction_t,
6746        idxBase: cusparseIndexBase_t,
6747        pBuffer: *mut ::core::ffi::c_void,
6748    ) -> cusparseStatus_t;
6749}
6750unsafe extern "C" {
6751    /// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
6752    ///
6753    /// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
6754    ///
6755    /// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
6756    ///
6757    /// * The routine requires no extra storage if `pBuffer != NULL`
6758    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
6759    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6760    pub fn cusparseDgebsr2gebsc(
6761        handle: cusparseHandle_t,
6762        mb: ::core::ffi::c_int,
6763        nb: ::core::ffi::c_int,
6764        nnzb: ::core::ffi::c_int,
6765        bsrSortedVal: *const f64,
6766        bsrSortedRowPtr: *const ::core::ffi::c_int,
6767        bsrSortedColInd: *const ::core::ffi::c_int,
6768        rowBlockDim: ::core::ffi::c_int,
6769        colBlockDim: ::core::ffi::c_int,
6770        bscVal: *mut f64,
6771        bscRowInd: *mut ::core::ffi::c_int,
6772        bscColPtr: *mut ::core::ffi::c_int,
6773        copyValues: cusparseAction_t,
6774        idxBase: cusparseIndexBase_t,
6775        pBuffer: *mut ::core::ffi::c_void,
6776    ) -> cusparseStatus_t;
6777}
6778unsafe extern "C" {
6779    /// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
6780    ///
6781    /// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
6782    ///
6783    /// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
6784    ///
6785    /// * The routine requires no extra storage if `pBuffer != NULL`
6786    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
6787    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6788    pub fn cusparseCgebsr2gebsc(
6789        handle: cusparseHandle_t,
6790        mb: ::core::ffi::c_int,
6791        nb: ::core::ffi::c_int,
6792        nnzb: ::core::ffi::c_int,
6793        bsrSortedVal: *const cuComplex,
6794        bsrSortedRowPtr: *const ::core::ffi::c_int,
6795        bsrSortedColInd: *const ::core::ffi::c_int,
6796        rowBlockDim: ::core::ffi::c_int,
6797        colBlockDim: ::core::ffi::c_int,
6798        bscVal: *mut cuComplex,
6799        bscRowInd: *mut ::core::ffi::c_int,
6800        bscColPtr: *mut ::core::ffi::c_int,
6801        copyValues: cusparseAction_t,
6802        idxBase: cusparseIndexBase_t,
6803        pBuffer: *mut ::core::ffi::c_void,
6804    ) -> cusparseStatus_t;
6805}
6806unsafe extern "C" {
6807    /// This function can be seen as the same as `csr2csc()` when each block of size `rowBlockDim*colBlockDim` is regarded as a scalar.
6808    ///
6809    /// This sparsity pattern of the result matrix can also be seen as the transpose of the original sparse matrix, but the memory layout of a block does not change.
6810    ///
6811    /// The user must call `gebsr2gebsc_bufferSize()` to determine the size of the buffer required by `gebsr2gebsc()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsc()`.
6812    ///
6813    /// * The routine requires no extra storage if `pBuffer != NULL`
6814    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
6815    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
6816    pub fn cusparseZgebsr2gebsc(
6817        handle: cusparseHandle_t,
6818        mb: ::core::ffi::c_int,
6819        nb: ::core::ffi::c_int,
6820        nnzb: ::core::ffi::c_int,
6821        bsrSortedVal: *const cuDoubleComplex,
6822        bsrSortedRowPtr: *const ::core::ffi::c_int,
6823        bsrSortedColInd: *const ::core::ffi::c_int,
6824        rowBlockDim: ::core::ffi::c_int,
6825        colBlockDim: ::core::ffi::c_int,
6826        bscVal: *mut cuDoubleComplex,
6827        bscRowInd: *mut ::core::ffi::c_int,
6828        bscColPtr: *mut ::core::ffi::c_int,
6829        copyValues: cusparseAction_t,
6830        idxBase: cusparseIndexBase_t,
6831        pBuffer: *mut ::core::ffi::c_void,
6832    ) -> cusparseStatus_t;
6833}
6834unsafe extern "C" {
6835    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6836    ///
6837    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6838    ///
6839    /// `csrRowPtrC` of `m+1` elements
6840    ///
6841    /// `csrValC` of `nnz` elements
6842    ///
6843    /// `csrColIndC` of `nnz` elements
6844    ///
6845    /// The general procedure is as follows:
6846    ///
6847    /// * The routine requires no extra storage
6848    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6849    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6850    pub fn cusparseXgebsr2csr(
6851        handle: cusparseHandle_t,
6852        dirA: cusparseDirection_t,
6853        mb: ::core::ffi::c_int,
6854        nb: ::core::ffi::c_int,
6855        descrA: cusparseMatDescr_t,
6856        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6857        bsrSortedColIndA: *const ::core::ffi::c_int,
6858        rowBlockDim: ::core::ffi::c_int,
6859        colBlockDim: ::core::ffi::c_int,
6860        descrC: cusparseMatDescr_t,
6861        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6862        csrSortedColIndC: *mut ::core::ffi::c_int,
6863    ) -> cusparseStatus_t;
6864}
6865unsafe extern "C" {
6866    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6867    ///
6868    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6869    ///
6870    /// `csrRowPtrC` of `m+1` elements
6871    ///
6872    /// `csrValC` of `nnz` elements
6873    ///
6874    /// `csrColIndC` of `nnz` elements
6875    ///
6876    /// The general procedure is as follows:
6877    ///
6878    /// * The routine requires no extra storage
6879    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6880    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6881    pub fn cusparseSgebsr2csr(
6882        handle: cusparseHandle_t,
6883        dirA: cusparseDirection_t,
6884        mb: ::core::ffi::c_int,
6885        nb: ::core::ffi::c_int,
6886        descrA: cusparseMatDescr_t,
6887        bsrSortedValA: *const f32,
6888        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6889        bsrSortedColIndA: *const ::core::ffi::c_int,
6890        rowBlockDim: ::core::ffi::c_int,
6891        colBlockDim: ::core::ffi::c_int,
6892        descrC: cusparseMatDescr_t,
6893        csrSortedValC: *mut f32,
6894        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6895        csrSortedColIndC: *mut ::core::ffi::c_int,
6896    ) -> cusparseStatus_t;
6897}
6898unsafe extern "C" {
6899    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6900    ///
6901    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6902    ///
6903    /// `csrRowPtrC` of `m+1` elements
6904    ///
6905    /// `csrValC` of `nnz` elements
6906    ///
6907    /// `csrColIndC` of `nnz` elements
6908    ///
6909    /// The general procedure is as follows:
6910    ///
6911    /// * The routine requires no extra storage
6912    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6913    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6914    pub fn cusparseDgebsr2csr(
6915        handle: cusparseHandle_t,
6916        dirA: cusparseDirection_t,
6917        mb: ::core::ffi::c_int,
6918        nb: ::core::ffi::c_int,
6919        descrA: cusparseMatDescr_t,
6920        bsrSortedValA: *const f64,
6921        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6922        bsrSortedColIndA: *const ::core::ffi::c_int,
6923        rowBlockDim: ::core::ffi::c_int,
6924        colBlockDim: ::core::ffi::c_int,
6925        descrC: cusparseMatDescr_t,
6926        csrSortedValC: *mut f64,
6927        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6928        csrSortedColIndC: *mut ::core::ffi::c_int,
6929    ) -> cusparseStatus_t;
6930}
6931unsafe extern "C" {
6932    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6933    ///
6934    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6935    ///
6936    /// `csrRowPtrC` of `m+1` elements
6937    ///
6938    /// `csrValC` of `nnz` elements
6939    ///
6940    /// `csrColIndC` of `nnz` elements
6941    ///
6942    /// The general procedure is as follows:
6943    ///
6944    /// * The routine requires no extra storage
6945    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6946    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6947    pub fn cusparseCgebsr2csr(
6948        handle: cusparseHandle_t,
6949        dirA: cusparseDirection_t,
6950        mb: ::core::ffi::c_int,
6951        nb: ::core::ffi::c_int,
6952        descrA: cusparseMatDescr_t,
6953        bsrSortedValA: *const cuComplex,
6954        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6955        bsrSortedColIndA: *const ::core::ffi::c_int,
6956        rowBlockDim: ::core::ffi::c_int,
6957        colBlockDim: ::core::ffi::c_int,
6958        descrC: cusparseMatDescr_t,
6959        csrSortedValC: *mut cuComplex,
6960        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6961        csrSortedColIndC: *mut ::core::ffi::c_int,
6962    ) -> cusparseStatus_t;
6963}
6964unsafe extern "C" {
6965    /// This function converts a sparse matrix in BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA`) into a sparse matrix in CSR format that is defined by arrays `csrValC`, `csrRowPtrC`, and `csrColIndC`.
6966    ///
6967    /// Let `m(=mb*blockDim)` be the number of rows of `A` and `n(=nb*blockDim)` be number of columns of `A`, then `A` and `C` are `m*n` sparse matrices. The BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks, whereas the sparse matrix `A` contains `nnz(=nnzb*blockDim*blockDim)` elements. The user must allocate enough space for arrays `csrRowPtrC`, `csrColIndC`, and `csrValC`. The requirements are as follows:
6968    ///
6969    /// `csrRowPtrC` of `m+1` elements
6970    ///
6971    /// `csrValC` of `nnz` elements
6972    ///
6973    /// `csrColIndC` of `nnz` elements
6974    ///
6975    /// The general procedure is as follows:
6976    ///
6977    /// * The routine requires no extra storage
6978    /// * The routine supports asynchronous execution if `blockDim != 1` or the Stream Ordered Memory Allocator is available
6979    /// * The routine supports CUDA graph capture if `blockDim != 1` or the Stream Ordered Memory Allocator is available.
6980    pub fn cusparseZgebsr2csr(
6981        handle: cusparseHandle_t,
6982        dirA: cusparseDirection_t,
6983        mb: ::core::ffi::c_int,
6984        nb: ::core::ffi::c_int,
6985        descrA: cusparseMatDescr_t,
6986        bsrSortedValA: *const cuDoubleComplex,
6987        bsrSortedRowPtrA: *const ::core::ffi::c_int,
6988        bsrSortedColIndA: *const ::core::ffi::c_int,
6989        rowBlockDim: ::core::ffi::c_int,
6990        colBlockDim: ::core::ffi::c_int,
6991        descrC: cusparseMatDescr_t,
6992        csrSortedValC: *mut cuDoubleComplex,
6993        csrSortedRowPtrC: *mut ::core::ffi::c_int,
6994        csrSortedColIndC: *mut ::core::ffi::c_int,
6995    ) -> cusparseStatus_t;
6996}
6997unsafe extern "C" {
6998    pub fn cusparseScsr2gebsr_bufferSize(
6999        handle: cusparseHandle_t,
7000        dirA: cusparseDirection_t,
7001        m: ::core::ffi::c_int,
7002        n: ::core::ffi::c_int,
7003        descrA: cusparseMatDescr_t,
7004        csrSortedValA: *const f32,
7005        csrSortedRowPtrA: *const ::core::ffi::c_int,
7006        csrSortedColIndA: *const ::core::ffi::c_int,
7007        rowBlockDim: ::core::ffi::c_int,
7008        colBlockDim: ::core::ffi::c_int,
7009        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7010    ) -> cusparseStatus_t;
7011}
7012unsafe extern "C" {
7013    pub fn cusparseDcsr2gebsr_bufferSize(
7014        handle: cusparseHandle_t,
7015        dirA: cusparseDirection_t,
7016        m: ::core::ffi::c_int,
7017        n: ::core::ffi::c_int,
7018        descrA: cusparseMatDescr_t,
7019        csrSortedValA: *const f64,
7020        csrSortedRowPtrA: *const ::core::ffi::c_int,
7021        csrSortedColIndA: *const ::core::ffi::c_int,
7022        rowBlockDim: ::core::ffi::c_int,
7023        colBlockDim: ::core::ffi::c_int,
7024        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7025    ) -> cusparseStatus_t;
7026}
7027unsafe extern "C" {
7028    pub fn cusparseCcsr2gebsr_bufferSize(
7029        handle: cusparseHandle_t,
7030        dirA: cusparseDirection_t,
7031        m: ::core::ffi::c_int,
7032        n: ::core::ffi::c_int,
7033        descrA: cusparseMatDescr_t,
7034        csrSortedValA: *const cuComplex,
7035        csrSortedRowPtrA: *const ::core::ffi::c_int,
7036        csrSortedColIndA: *const ::core::ffi::c_int,
7037        rowBlockDim: ::core::ffi::c_int,
7038        colBlockDim: ::core::ffi::c_int,
7039        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7040    ) -> cusparseStatus_t;
7041}
7042unsafe extern "C" {
7043    pub fn cusparseZcsr2gebsr_bufferSize(
7044        handle: cusparseHandle_t,
7045        dirA: cusparseDirection_t,
7046        m: ::core::ffi::c_int,
7047        n: ::core::ffi::c_int,
7048        descrA: cusparseMatDescr_t,
7049        csrSortedValA: *const cuDoubleComplex,
7050        csrSortedRowPtrA: *const ::core::ffi::c_int,
7051        csrSortedColIndA: *const ::core::ffi::c_int,
7052        rowBlockDim: ::core::ffi::c_int,
7053        colBlockDim: ::core::ffi::c_int,
7054        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7055    ) -> cusparseStatus_t;
7056}
7057unsafe extern "C" {
7058    pub fn cusparseScsr2gebsr_bufferSizeExt(
7059        handle: cusparseHandle_t,
7060        dirA: cusparseDirection_t,
7061        m: ::core::ffi::c_int,
7062        n: ::core::ffi::c_int,
7063        descrA: cusparseMatDescr_t,
7064        csrSortedValA: *const f32,
7065        csrSortedRowPtrA: *const ::core::ffi::c_int,
7066        csrSortedColIndA: *const ::core::ffi::c_int,
7067        rowBlockDim: ::core::ffi::c_int,
7068        colBlockDim: ::core::ffi::c_int,
7069        pBufferSize: *mut size_t,
7070    ) -> cusparseStatus_t;
7071}
7072unsafe extern "C" {
7073    pub fn cusparseDcsr2gebsr_bufferSizeExt(
7074        handle: cusparseHandle_t,
7075        dirA: cusparseDirection_t,
7076        m: ::core::ffi::c_int,
7077        n: ::core::ffi::c_int,
7078        descrA: cusparseMatDescr_t,
7079        csrSortedValA: *const f64,
7080        csrSortedRowPtrA: *const ::core::ffi::c_int,
7081        csrSortedColIndA: *const ::core::ffi::c_int,
7082        rowBlockDim: ::core::ffi::c_int,
7083        colBlockDim: ::core::ffi::c_int,
7084        pBufferSize: *mut size_t,
7085    ) -> cusparseStatus_t;
7086}
7087unsafe extern "C" {
7088    pub fn cusparseCcsr2gebsr_bufferSizeExt(
7089        handle: cusparseHandle_t,
7090        dirA: cusparseDirection_t,
7091        m: ::core::ffi::c_int,
7092        n: ::core::ffi::c_int,
7093        descrA: cusparseMatDescr_t,
7094        csrSortedValA: *const cuComplex,
7095        csrSortedRowPtrA: *const ::core::ffi::c_int,
7096        csrSortedColIndA: *const ::core::ffi::c_int,
7097        rowBlockDim: ::core::ffi::c_int,
7098        colBlockDim: ::core::ffi::c_int,
7099        pBufferSize: *mut size_t,
7100    ) -> cusparseStatus_t;
7101}
7102unsafe extern "C" {
7103    pub fn cusparseZcsr2gebsr_bufferSizeExt(
7104        handle: cusparseHandle_t,
7105        dirA: cusparseDirection_t,
7106        m: ::core::ffi::c_int,
7107        n: ::core::ffi::c_int,
7108        descrA: cusparseMatDescr_t,
7109        csrSortedValA: *const cuDoubleComplex,
7110        csrSortedRowPtrA: *const ::core::ffi::c_int,
7111        csrSortedColIndA: *const ::core::ffi::c_int,
7112        rowBlockDim: ::core::ffi::c_int,
7113        colBlockDim: ::core::ffi::c_int,
7114        pBufferSize: *mut size_t,
7115    ) -> cusparseStatus_t;
7116}
7117unsafe extern "C" {
7118    pub fn cusparseXcsr2gebsrNnz(
7119        handle: cusparseHandle_t,
7120        dirA: cusparseDirection_t,
7121        m: ::core::ffi::c_int,
7122        n: ::core::ffi::c_int,
7123        descrA: cusparseMatDescr_t,
7124        csrSortedRowPtrA: *const ::core::ffi::c_int,
7125        csrSortedColIndA: *const ::core::ffi::c_int,
7126        descrC: cusparseMatDescr_t,
7127        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7128        rowBlockDim: ::core::ffi::c_int,
7129        colBlockDim: ::core::ffi::c_int,
7130        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
7131        pBuffer: *mut ::core::ffi::c_void,
7132    ) -> cusparseStatus_t;
7133}
7134unsafe extern "C" {
7135    /// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
7136    ///
7137    /// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
7138    ///
7139    /// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
7140    ///
7141    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC[mb]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse[S|D|C|Z]csr2gebsr()` is called to complete the conversion.
7142    ///
7143    /// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
7144    ///
7145    /// The general procedure is as follows:
7146    ///
7147    /// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
7148    ///
7149    /// * The routine requires no extra storage.
7150    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
7151    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7152    ///
7153    /// The routine [`cusparseScsr2gebsr`] has the following properties:
7154    ///
7155    /// * The routine requires no extra storage if `pBuffer != NULL`.
7156    /// * The routine supports asynchronous execution.
7157    /// * The routine supports CUDA graph capture.
7158    pub fn cusparseScsr2gebsr(
7159        handle: cusparseHandle_t,
7160        dirA: cusparseDirection_t,
7161        m: ::core::ffi::c_int,
7162        n: ::core::ffi::c_int,
7163        descrA: cusparseMatDescr_t,
7164        csrSortedValA: *const f32,
7165        csrSortedRowPtrA: *const ::core::ffi::c_int,
7166        csrSortedColIndA: *const ::core::ffi::c_int,
7167        descrC: cusparseMatDescr_t,
7168        bsrSortedValC: *mut f32,
7169        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7170        bsrSortedColIndC: *mut ::core::ffi::c_int,
7171        rowBlockDim: ::core::ffi::c_int,
7172        colBlockDim: ::core::ffi::c_int,
7173        pBuffer: *mut ::core::ffi::c_void,
7174    ) -> cusparseStatus_t;
7175}
7176unsafe extern "C" {
7177    /// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
7178    ///
7179    /// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
7180    ///
7181    /// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
7182    ///
7183    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC[mb]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse[S|D|C|Z]csr2gebsr()` is called to complete the conversion.
7184    ///
7185    /// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
7186    ///
7187    /// The general procedure is as follows:
7188    ///
7189    /// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
7190    ///
7191    /// * The routine requires no extra storage.
7192    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
7193    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7194    ///
7195    /// The routine [`cusparseDcsr2gebsr`] has the following properties:
7196    ///
7197    /// * The routine requires no extra storage if `pBuffer != NULL`.
7198    /// * The routine supports asynchronous execution.
7199    /// * The routine supports CUDA graph capture.
7200    pub fn cusparseDcsr2gebsr(
7201        handle: cusparseHandle_t,
7202        dirA: cusparseDirection_t,
7203        m: ::core::ffi::c_int,
7204        n: ::core::ffi::c_int,
7205        descrA: cusparseMatDescr_t,
7206        csrSortedValA: *const f64,
7207        csrSortedRowPtrA: *const ::core::ffi::c_int,
7208        csrSortedColIndA: *const ::core::ffi::c_int,
7209        descrC: cusparseMatDescr_t,
7210        bsrSortedValC: *mut f64,
7211        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7212        bsrSortedColIndC: *mut ::core::ffi::c_int,
7213        rowBlockDim: ::core::ffi::c_int,
7214        colBlockDim: ::core::ffi::c_int,
7215        pBuffer: *mut ::core::ffi::c_void,
7216    ) -> cusparseStatus_t;
7217}
7218unsafe extern "C" {
7219    /// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
7220    ///
7221    /// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
7222    ///
7223    /// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
7224    ///
7225    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC[mb]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse[S|D|C|Z]csr2gebsr()` is called to complete the conversion.
7226    ///
7227    /// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
7228    ///
7229    /// The general procedure is as follows:
7230    ///
7231    /// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
7232    ///
7233    /// * The routine requires no extra storage.
7234    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
7235    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7236    ///
7237    /// The routine [`cusparseCcsr2gebsr`] has the following properties:
7238    ///
7239    /// * The routine requires no extra storage if `pBuffer != NULL`.
7240    /// * The routine supports asynchronous execution.
7241    /// * The routine supports CUDA graph capture.
7242    pub fn cusparseCcsr2gebsr(
7243        handle: cusparseHandle_t,
7244        dirA: cusparseDirection_t,
7245        m: ::core::ffi::c_int,
7246        n: ::core::ffi::c_int,
7247        descrA: cusparseMatDescr_t,
7248        csrSortedValA: *const cuComplex,
7249        csrSortedRowPtrA: *const ::core::ffi::c_int,
7250        csrSortedColIndA: *const ::core::ffi::c_int,
7251        descrC: cusparseMatDescr_t,
7252        bsrSortedValC: *mut cuComplex,
7253        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7254        bsrSortedColIndC: *mut ::core::ffi::c_int,
7255        rowBlockDim: ::core::ffi::c_int,
7256        colBlockDim: ::core::ffi::c_int,
7257        pBuffer: *mut ::core::ffi::c_void,
7258    ) -> cusparseStatus_t;
7259}
7260unsafe extern "C" {
7261    /// This function converts a sparse matrix `A` in CSR format (that is defined by arrays `csrValA`, `csrRowPtrA`, and `csrColIndA`) into a sparse matrix `C` in general BSR format (that is defined by the three arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`).
7262    ///
7263    /// The matrix A is an :math: m times n sparse matrix and matrix `C` is a `(mb*rowBlockDim)*(nb*colBlockDim)` sparse matrix, where `mb(=(m+rowBlockDim-1)/rowBlockDim)` is the number of block rows of `C`, and `nb(=(n+colBlockDim-1)/colBlockDim)` is the number of block columns of `C`.
7264    ///
7265    /// The block of `C` is of size `rowBlockDim*colBlockDim`. If `m` is not multiple of `rowBlockDim` or `n` is not multiple of `colBlockDim`, zeros are filled in.
7266    ///
7267    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mb+1` elements and uses function [`cusparseXcsr2gebsrNnz`] to determine the number of nonzero block columns per block row. Second, the user gathers `nnzb` (number of nonzero block columns of matrix `C`) from either `(nnzb=*nnzTotalDevHostPtr)` or `(nnzb=bsrRowPtrC[mb]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzb*rowBlockDim*colBlockDim` elements and `bsrColIndC` of `nnzb` integers. Finally function `cusparse[S|D|C|Z]csr2gebsr()` is called to complete the conversion.
7268    ///
7269    /// The user must obtain the size of the buffer required by `csr2gebsr()` by calling `csr2gebsr_bufferSize()`, allocate the buffer, and pass the buffer pointer to `csr2gebsr()`.
7270    ///
7271    /// The general procedure is as follows:
7272    ///
7273    /// The routine [`cusparseXcsr2gebsrNnz`] has the following properties:
7274    ///
7275    /// * The routine requires no extra storage.
7276    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
7277    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7278    ///
7279    /// The routine [`cusparseZcsr2gebsr`] has the following properties:
7280    ///
7281    /// * The routine requires no extra storage if `pBuffer != NULL`.
7282    /// * The routine supports asynchronous execution.
7283    /// * The routine supports CUDA graph capture.
7284    pub fn cusparseZcsr2gebsr(
7285        handle: cusparseHandle_t,
7286        dirA: cusparseDirection_t,
7287        m: ::core::ffi::c_int,
7288        n: ::core::ffi::c_int,
7289        descrA: cusparseMatDescr_t,
7290        csrSortedValA: *const cuDoubleComplex,
7291        csrSortedRowPtrA: *const ::core::ffi::c_int,
7292        csrSortedColIndA: *const ::core::ffi::c_int,
7293        descrC: cusparseMatDescr_t,
7294        bsrSortedValC: *mut cuDoubleComplex,
7295        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7296        bsrSortedColIndC: *mut ::core::ffi::c_int,
7297        rowBlockDim: ::core::ffi::c_int,
7298        colBlockDim: ::core::ffi::c_int,
7299        pBuffer: *mut ::core::ffi::c_void,
7300    ) -> cusparseStatus_t;
7301}
7302unsafe extern "C" {
7303    pub fn cusparseSgebsr2gebsr_bufferSize(
7304        handle: cusparseHandle_t,
7305        dirA: cusparseDirection_t,
7306        mb: ::core::ffi::c_int,
7307        nb: ::core::ffi::c_int,
7308        nnzb: ::core::ffi::c_int,
7309        descrA: cusparseMatDescr_t,
7310        bsrSortedValA: *const f32,
7311        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7312        bsrSortedColIndA: *const ::core::ffi::c_int,
7313        rowBlockDimA: ::core::ffi::c_int,
7314        colBlockDimA: ::core::ffi::c_int,
7315        rowBlockDimC: ::core::ffi::c_int,
7316        colBlockDimC: ::core::ffi::c_int,
7317        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7318    ) -> cusparseStatus_t;
7319}
7320unsafe extern "C" {
7321    pub fn cusparseDgebsr2gebsr_bufferSize(
7322        handle: cusparseHandle_t,
7323        dirA: cusparseDirection_t,
7324        mb: ::core::ffi::c_int,
7325        nb: ::core::ffi::c_int,
7326        nnzb: ::core::ffi::c_int,
7327        descrA: cusparseMatDescr_t,
7328        bsrSortedValA: *const f64,
7329        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7330        bsrSortedColIndA: *const ::core::ffi::c_int,
7331        rowBlockDimA: ::core::ffi::c_int,
7332        colBlockDimA: ::core::ffi::c_int,
7333        rowBlockDimC: ::core::ffi::c_int,
7334        colBlockDimC: ::core::ffi::c_int,
7335        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7336    ) -> cusparseStatus_t;
7337}
7338unsafe extern "C" {
7339    pub fn cusparseCgebsr2gebsr_bufferSize(
7340        handle: cusparseHandle_t,
7341        dirA: cusparseDirection_t,
7342        mb: ::core::ffi::c_int,
7343        nb: ::core::ffi::c_int,
7344        nnzb: ::core::ffi::c_int,
7345        descrA: cusparseMatDescr_t,
7346        bsrSortedValA: *const cuComplex,
7347        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7348        bsrSortedColIndA: *const ::core::ffi::c_int,
7349        rowBlockDimA: ::core::ffi::c_int,
7350        colBlockDimA: ::core::ffi::c_int,
7351        rowBlockDimC: ::core::ffi::c_int,
7352        colBlockDimC: ::core::ffi::c_int,
7353        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7354    ) -> cusparseStatus_t;
7355}
7356unsafe extern "C" {
7357    pub fn cusparseZgebsr2gebsr_bufferSize(
7358        handle: cusparseHandle_t,
7359        dirA: cusparseDirection_t,
7360        mb: ::core::ffi::c_int,
7361        nb: ::core::ffi::c_int,
7362        nnzb: ::core::ffi::c_int,
7363        descrA: cusparseMatDescr_t,
7364        bsrSortedValA: *const cuDoubleComplex,
7365        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7366        bsrSortedColIndA: *const ::core::ffi::c_int,
7367        rowBlockDimA: ::core::ffi::c_int,
7368        colBlockDimA: ::core::ffi::c_int,
7369        rowBlockDimC: ::core::ffi::c_int,
7370        colBlockDimC: ::core::ffi::c_int,
7371        pBufferSizeInBytes: *mut ::core::ffi::c_int,
7372    ) -> cusparseStatus_t;
7373}
7374unsafe extern "C" {
7375    pub fn cusparseSgebsr2gebsr_bufferSizeExt(
7376        handle: cusparseHandle_t,
7377        dirA: cusparseDirection_t,
7378        mb: ::core::ffi::c_int,
7379        nb: ::core::ffi::c_int,
7380        nnzb: ::core::ffi::c_int,
7381        descrA: cusparseMatDescr_t,
7382        bsrSortedValA: *const f32,
7383        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7384        bsrSortedColIndA: *const ::core::ffi::c_int,
7385        rowBlockDimA: ::core::ffi::c_int,
7386        colBlockDimA: ::core::ffi::c_int,
7387        rowBlockDimC: ::core::ffi::c_int,
7388        colBlockDimC: ::core::ffi::c_int,
7389        pBufferSize: *mut size_t,
7390    ) -> cusparseStatus_t;
7391}
7392unsafe extern "C" {
7393    pub fn cusparseDgebsr2gebsr_bufferSizeExt(
7394        handle: cusparseHandle_t,
7395        dirA: cusparseDirection_t,
7396        mb: ::core::ffi::c_int,
7397        nb: ::core::ffi::c_int,
7398        nnzb: ::core::ffi::c_int,
7399        descrA: cusparseMatDescr_t,
7400        bsrSortedValA: *const f64,
7401        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7402        bsrSortedColIndA: *const ::core::ffi::c_int,
7403        rowBlockDimA: ::core::ffi::c_int,
7404        colBlockDimA: ::core::ffi::c_int,
7405        rowBlockDimC: ::core::ffi::c_int,
7406        colBlockDimC: ::core::ffi::c_int,
7407        pBufferSize: *mut size_t,
7408    ) -> cusparseStatus_t;
7409}
7410unsafe extern "C" {
7411    pub fn cusparseCgebsr2gebsr_bufferSizeExt(
7412        handle: cusparseHandle_t,
7413        dirA: cusparseDirection_t,
7414        mb: ::core::ffi::c_int,
7415        nb: ::core::ffi::c_int,
7416        nnzb: ::core::ffi::c_int,
7417        descrA: cusparseMatDescr_t,
7418        bsrSortedValA: *const cuComplex,
7419        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7420        bsrSortedColIndA: *const ::core::ffi::c_int,
7421        rowBlockDimA: ::core::ffi::c_int,
7422        colBlockDimA: ::core::ffi::c_int,
7423        rowBlockDimC: ::core::ffi::c_int,
7424        colBlockDimC: ::core::ffi::c_int,
7425        pBufferSize: *mut size_t,
7426    ) -> cusparseStatus_t;
7427}
7428unsafe extern "C" {
7429    pub fn cusparseZgebsr2gebsr_bufferSizeExt(
7430        handle: cusparseHandle_t,
7431        dirA: cusparseDirection_t,
7432        mb: ::core::ffi::c_int,
7433        nb: ::core::ffi::c_int,
7434        nnzb: ::core::ffi::c_int,
7435        descrA: cusparseMatDescr_t,
7436        bsrSortedValA: *const cuDoubleComplex,
7437        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7438        bsrSortedColIndA: *const ::core::ffi::c_int,
7439        rowBlockDimA: ::core::ffi::c_int,
7440        colBlockDimA: ::core::ffi::c_int,
7441        rowBlockDimC: ::core::ffi::c_int,
7442        colBlockDimC: ::core::ffi::c_int,
7443        pBufferSize: *mut size_t,
7444    ) -> cusparseStatus_t;
7445}
7446unsafe extern "C" {
7447    pub fn cusparseXgebsr2gebsrNnz(
7448        handle: cusparseHandle_t,
7449        dirA: cusparseDirection_t,
7450        mb: ::core::ffi::c_int,
7451        nb: ::core::ffi::c_int,
7452        nnzb: ::core::ffi::c_int,
7453        descrA: cusparseMatDescr_t,
7454        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7455        bsrSortedColIndA: *const ::core::ffi::c_int,
7456        rowBlockDimA: ::core::ffi::c_int,
7457        colBlockDimA: ::core::ffi::c_int,
7458        descrC: cusparseMatDescr_t,
7459        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7460        rowBlockDimC: ::core::ffi::c_int,
7461        colBlockDimC: ::core::ffi::c_int,
7462        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
7463        pBuffer: *mut ::core::ffi::c_void,
7464    ) -> cusparseStatus_t;
7465}
7466unsafe extern "C" {
7467    /// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
7468    ///
7469    /// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]csr2gebsr()`.
7470    ///
7471    /// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]gebsr2csr()`.
7472    ///
7473    /// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
7474    ///
7475    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC[mc]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse[S|D|C|Z]gebsr2gebsr()` is called to complete the conversion.
7476    ///
7477    /// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
7478    ///
7479    /// The general procedure is as follows:
7480    ///
7481    /// * The routines require no extra storage if `pBuffer != NULL`
7482    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7483    /// * The routines do **not** support CUDA graph capture.
7484    pub fn cusparseSgebsr2gebsr(
7485        handle: cusparseHandle_t,
7486        dirA: cusparseDirection_t,
7487        mb: ::core::ffi::c_int,
7488        nb: ::core::ffi::c_int,
7489        nnzb: ::core::ffi::c_int,
7490        descrA: cusparseMatDescr_t,
7491        bsrSortedValA: *const f32,
7492        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7493        bsrSortedColIndA: *const ::core::ffi::c_int,
7494        rowBlockDimA: ::core::ffi::c_int,
7495        colBlockDimA: ::core::ffi::c_int,
7496        descrC: cusparseMatDescr_t,
7497        bsrSortedValC: *mut f32,
7498        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7499        bsrSortedColIndC: *mut ::core::ffi::c_int,
7500        rowBlockDimC: ::core::ffi::c_int,
7501        colBlockDimC: ::core::ffi::c_int,
7502        pBuffer: *mut ::core::ffi::c_void,
7503    ) -> cusparseStatus_t;
7504}
7505unsafe extern "C" {
7506    /// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
7507    ///
7508    /// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]csr2gebsr()`.
7509    ///
7510    /// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]gebsr2csr()`.
7511    ///
7512    /// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
7513    ///
7514    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC[mc]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse[S|D|C|Z]gebsr2gebsr()` is called to complete the conversion.
7515    ///
7516    /// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
7517    ///
7518    /// The general procedure is as follows:
7519    ///
7520    /// * The routines require no extra storage if `pBuffer != NULL`
7521    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7522    /// * The routines do **not** support CUDA graph capture.
7523    pub fn cusparseDgebsr2gebsr(
7524        handle: cusparseHandle_t,
7525        dirA: cusparseDirection_t,
7526        mb: ::core::ffi::c_int,
7527        nb: ::core::ffi::c_int,
7528        nnzb: ::core::ffi::c_int,
7529        descrA: cusparseMatDescr_t,
7530        bsrSortedValA: *const f64,
7531        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7532        bsrSortedColIndA: *const ::core::ffi::c_int,
7533        rowBlockDimA: ::core::ffi::c_int,
7534        colBlockDimA: ::core::ffi::c_int,
7535        descrC: cusparseMatDescr_t,
7536        bsrSortedValC: *mut f64,
7537        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7538        bsrSortedColIndC: *mut ::core::ffi::c_int,
7539        rowBlockDimC: ::core::ffi::c_int,
7540        colBlockDimC: ::core::ffi::c_int,
7541        pBuffer: *mut ::core::ffi::c_void,
7542    ) -> cusparseStatus_t;
7543}
7544unsafe extern "C" {
7545    /// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
7546    ///
7547    /// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]csr2gebsr()`.
7548    ///
7549    /// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]gebsr2csr()`.
7550    ///
7551    /// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
7552    ///
7553    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC[mc]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse[S|D|C|Z]gebsr2gebsr()` is called to complete the conversion.
7554    ///
7555    /// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
7556    ///
7557    /// The general procedure is as follows:
7558    ///
7559    /// * The routines require no extra storage if `pBuffer != NULL`
7560    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7561    /// * The routines do **not** support CUDA graph capture.
7562    pub fn cusparseCgebsr2gebsr(
7563        handle: cusparseHandle_t,
7564        dirA: cusparseDirection_t,
7565        mb: ::core::ffi::c_int,
7566        nb: ::core::ffi::c_int,
7567        nnzb: ::core::ffi::c_int,
7568        descrA: cusparseMatDescr_t,
7569        bsrSortedValA: *const cuComplex,
7570        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7571        bsrSortedColIndA: *const ::core::ffi::c_int,
7572        rowBlockDimA: ::core::ffi::c_int,
7573        colBlockDimA: ::core::ffi::c_int,
7574        descrC: cusparseMatDescr_t,
7575        bsrSortedValC: *mut cuComplex,
7576        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7577        bsrSortedColIndC: *mut ::core::ffi::c_int,
7578        rowBlockDimC: ::core::ffi::c_int,
7579        colBlockDimC: ::core::ffi::c_int,
7580        pBuffer: *mut ::core::ffi::c_void,
7581    ) -> cusparseStatus_t;
7582}
7583unsafe extern "C" {
7584    /// This function converts a sparse matrix in general BSR format that is defined by the three arrays `bsrValA`, `bsrRowPtrA`, and `bsrColIndA` into a sparse matrix in another general BSR format that is defined by arrays `bsrValC`, `bsrRowPtrC`, and `bsrColIndC`.
7585    ///
7586    /// If `rowBlockDimA=1` and `colBlockDimA=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]csr2gebsr()`.
7587    ///
7588    /// If `rowBlockDimC=1` and `colBlockDimC=1`, `cusparse[S|D|C|Z]gebsr2gebsr()` is the same as `cusparse[S|D|C|Z]gebsr2csr()`.
7589    ///
7590    /// `A` is an `m*n` sparse matrix where `m(=mb*rowBlockDim)` is the number of rows of `A`, and `n(=nb*colBlockDim)` is the number of columns of `A`. The general BSR format of `A` contains `nnzb(=bsrRowPtrA[mb] - bsrRowPtrA[0])` nonzero blocks. The matrix `C` is also general BSR format with a different block size, `rowBlockDimC*colBlockDimC`. If `m` is not a multiple of `rowBlockDimC`, or `n` is not a multiple of `colBlockDimC`, zeros are filled in. The number of block rows of `C` is `mc(=(m+rowBlockDimC-1)/rowBlockDimC)`. The number of block rows of `C` is `nc(=(n+colBlockDimC-1)/colBlockDimC)`. The number of nonzero blocks of `C` is `nnzc`.
7591    ///
7592    /// The implementation adopts a two-step approach to do the conversion. First, the user allocates `bsrRowPtrC` of `mc+1` elements and uses function [`cusparseXgebsr2gebsrNnz`] to determine the number of nonzero block columns per block row of matrix `C`. Second, the user gathers `nnzc` (number of non-zero block columns of matrix `C`) from either `(nnzc=*nnzTotalDevHostPtr)` or `(nnzc=bsrRowPtrC[mc]-bsrRowPtrC[0])` and allocates `bsrValC` of `nnzc*rowBlockDimC*colBlockDimC` elements and `bsrColIndC` of `nnzc` integers. Finally the function `cusparse[S|D|C|Z]gebsr2gebsr()` is called to complete the conversion.
7593    ///
7594    /// The user must call `gebsr2gebsr_bufferSize()` to know the size of the buffer required by `gebsr2gebsr()`, allocate the buffer, and pass the buffer pointer to `gebsr2gebsr()`.
7595    ///
7596    /// The general procedure is as follows:
7597    ///
7598    /// * The routines require no extra storage if `pBuffer != NULL`
7599    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7600    /// * The routines do **not** support CUDA graph capture.
7601    pub fn cusparseZgebsr2gebsr(
7602        handle: cusparseHandle_t,
7603        dirA: cusparseDirection_t,
7604        mb: ::core::ffi::c_int,
7605        nb: ::core::ffi::c_int,
7606        nnzb: ::core::ffi::c_int,
7607        descrA: cusparseMatDescr_t,
7608        bsrSortedValA: *const cuDoubleComplex,
7609        bsrSortedRowPtrA: *const ::core::ffi::c_int,
7610        bsrSortedColIndA: *const ::core::ffi::c_int,
7611        rowBlockDimA: ::core::ffi::c_int,
7612        colBlockDimA: ::core::ffi::c_int,
7613        descrC: cusparseMatDescr_t,
7614        bsrSortedValC: *mut cuDoubleComplex,
7615        bsrSortedRowPtrC: *mut ::core::ffi::c_int,
7616        bsrSortedColIndC: *mut ::core::ffi::c_int,
7617        rowBlockDimC: ::core::ffi::c_int,
7618        colBlockDimC: ::core::ffi::c_int,
7619        pBuffer: *mut ::core::ffi::c_void,
7620    ) -> cusparseStatus_t;
7621}
7622unsafe extern "C" {
7623    /// This function creates an identity map. The output parameter `p` represents such map by `p = 0:1:(n-1)`.
7624    ///
7625    /// This function is typically used with `coosort`, `csrsort`, `cscsort`.
7626    ///
7627    /// * The routine requires no extra storage.
7628    /// * The routine supports asynchronous execution.
7629    /// * The routine supports CUDA graph capture.
7630    #[deprecated]
7631    pub fn cusparseCreateIdentityPermutation(
7632        handle: cusparseHandle_t,
7633        n: ::core::ffi::c_int,
7634        p: *mut ::core::ffi::c_int,
7635    ) -> cusparseStatus_t;
7636}
7637unsafe extern "C" {
7638    pub fn cusparseXcoosort_bufferSizeExt(
7639        handle: cusparseHandle_t,
7640        m: ::core::ffi::c_int,
7641        n: ::core::ffi::c_int,
7642        nnz: ::core::ffi::c_int,
7643        cooRowsA: *const ::core::ffi::c_int,
7644        cooColsA: *const ::core::ffi::c_int,
7645        pBufferSizeInBytes: *mut size_t,
7646    ) -> cusparseStatus_t;
7647}
7648unsafe extern "C" {
7649    pub fn cusparseXcoosortByRow(
7650        handle: cusparseHandle_t,
7651        m: ::core::ffi::c_int,
7652        n: ::core::ffi::c_int,
7653        nnz: ::core::ffi::c_int,
7654        cooRowsA: *mut ::core::ffi::c_int,
7655        cooColsA: *mut ::core::ffi::c_int,
7656        P: *mut ::core::ffi::c_int,
7657        pBuffer: *mut ::core::ffi::c_void,
7658    ) -> cusparseStatus_t;
7659}
7660unsafe extern "C" {
7661    pub fn cusparseXcoosortByColumn(
7662        handle: cusparseHandle_t,
7663        m: ::core::ffi::c_int,
7664        n: ::core::ffi::c_int,
7665        nnz: ::core::ffi::c_int,
7666        cooRowsA: *mut ::core::ffi::c_int,
7667        cooColsA: *mut ::core::ffi::c_int,
7668        P: *mut ::core::ffi::c_int,
7669        pBuffer: *mut ::core::ffi::c_void,
7670    ) -> cusparseStatus_t;
7671}
7672unsafe extern "C" {
7673    pub fn cusparseXcsrsort_bufferSizeExt(
7674        handle: cusparseHandle_t,
7675        m: ::core::ffi::c_int,
7676        n: ::core::ffi::c_int,
7677        nnz: ::core::ffi::c_int,
7678        csrRowPtrA: *const ::core::ffi::c_int,
7679        csrColIndA: *const ::core::ffi::c_int,
7680        pBufferSizeInBytes: *mut size_t,
7681    ) -> cusparseStatus_t;
7682}
7683unsafe extern "C" {
7684    /// This function sorts CSR format. The stable sorting is in-place.
7685    ///
7686    /// The matrix type is regarded as [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] implicitly. In other words, any symmetric property is ignored.
7687    ///
7688    /// This function `csrsort()` requires buffer size returned by `csrsort_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
7689    ///
7690    /// The parameter `P` is both input and output. If the user wants to compute sorted `csrVal`, `P` must be set as 0:1:(nnz-1) before `csrsort()`, and after `csrsort()`, new sorted value array satisfies `csrVal_sorted = csrVal(P)`.
7691    ///
7692    /// The general procedure is as follows:
7693    ///
7694    /// * The routine requires no extra storage if `pBuffer != NULL`
7695    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7696    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7697    pub fn cusparseXcsrsort(
7698        handle: cusparseHandle_t,
7699        m: ::core::ffi::c_int,
7700        n: ::core::ffi::c_int,
7701        nnz: ::core::ffi::c_int,
7702        descrA: cusparseMatDescr_t,
7703        csrRowPtrA: *const ::core::ffi::c_int,
7704        csrColIndA: *mut ::core::ffi::c_int,
7705        P: *mut ::core::ffi::c_int,
7706        pBuffer: *mut ::core::ffi::c_void,
7707    ) -> cusparseStatus_t;
7708}
7709unsafe extern "C" {
7710    pub fn cusparseXcscsort_bufferSizeExt(
7711        handle: cusparseHandle_t,
7712        m: ::core::ffi::c_int,
7713        n: ::core::ffi::c_int,
7714        nnz: ::core::ffi::c_int,
7715        cscColPtrA: *const ::core::ffi::c_int,
7716        cscRowIndA: *const ::core::ffi::c_int,
7717        pBufferSizeInBytes: *mut size_t,
7718    ) -> cusparseStatus_t;
7719}
7720unsafe extern "C" {
7721    /// This function sorts CSC format. The stable sorting is in-place.
7722    ///
7723    /// The matrix type is regarded as [`cusparseMatrixType_t::CUSPARSE_MATRIX_TYPE_GENERAL`] implicitly. In other words, any symmetric property is ignored.
7724    ///
7725    /// This function `cscsort()` requires buffer size returned by `cscsort_bufferSizeExt()`. The address of `pBuffer` must be multiple of 128 bytes. If not, [`cusparseStatus_t::CUSPARSE_STATUS_INVALID_VALUE`] is returned.
7726    ///
7727    /// The parameter `P` is both input and output. If the user wants to compute sorted `cscVal`, `P` must be set as 0:1:(nnz-1) before `cscsort()`, and after `cscsort()`, new sorted value array satisfies `cscVal_sorted = cscVal(P)`.
7728    ///
7729    /// The general procedure is as follows:
7730    ///
7731    /// * The routine requires no extra storage if `pBuffer != NULL`
7732    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available
7733    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
7734    pub fn cusparseXcscsort(
7735        handle: cusparseHandle_t,
7736        m: ::core::ffi::c_int,
7737        n: ::core::ffi::c_int,
7738        nnz: ::core::ffi::c_int,
7739        descrA: cusparseMatDescr_t,
7740        cscColPtrA: *const ::core::ffi::c_int,
7741        cscRowIndA: *mut ::core::ffi::c_int,
7742        P: *mut ::core::ffi::c_int,
7743        pBuffer: *mut ::core::ffi::c_void,
7744    ) -> cusparseStatus_t;
7745}
7746unsafe extern "C" {
7747    pub fn cusparseScsru2csr_bufferSizeExt(
7748        handle: cusparseHandle_t,
7749        m: ::core::ffi::c_int,
7750        n: ::core::ffi::c_int,
7751        nnz: ::core::ffi::c_int,
7752        csrVal: *mut f32,
7753        csrRowPtr: *const ::core::ffi::c_int,
7754        csrColInd: *mut ::core::ffi::c_int,
7755        info: csru2csrInfo_t,
7756        pBufferSizeInBytes: *mut size_t,
7757    ) -> cusparseStatus_t;
7758}
7759unsafe extern "C" {
7760    pub fn cusparseDcsru2csr_bufferSizeExt(
7761        handle: cusparseHandle_t,
7762        m: ::core::ffi::c_int,
7763        n: ::core::ffi::c_int,
7764        nnz: ::core::ffi::c_int,
7765        csrVal: *mut f64,
7766        csrRowPtr: *const ::core::ffi::c_int,
7767        csrColInd: *mut ::core::ffi::c_int,
7768        info: csru2csrInfo_t,
7769        pBufferSizeInBytes: *mut size_t,
7770    ) -> cusparseStatus_t;
7771}
7772unsafe extern "C" {
7773    pub fn cusparseCcsru2csr_bufferSizeExt(
7774        handle: cusparseHandle_t,
7775        m: ::core::ffi::c_int,
7776        n: ::core::ffi::c_int,
7777        nnz: ::core::ffi::c_int,
7778        csrVal: *mut cuComplex,
7779        csrRowPtr: *const ::core::ffi::c_int,
7780        csrColInd: *mut ::core::ffi::c_int,
7781        info: csru2csrInfo_t,
7782        pBufferSizeInBytes: *mut size_t,
7783    ) -> cusparseStatus_t;
7784}
7785unsafe extern "C" {
7786    pub fn cusparseZcsru2csr_bufferSizeExt(
7787        handle: cusparseHandle_t,
7788        m: ::core::ffi::c_int,
7789        n: ::core::ffi::c_int,
7790        nnz: ::core::ffi::c_int,
7791        csrVal: *mut cuDoubleComplex,
7792        csrRowPtr: *const ::core::ffi::c_int,
7793        csrColInd: *mut ::core::ffi::c_int,
7794        info: csru2csrInfo_t,
7795        pBufferSizeInBytes: *mut size_t,
7796    ) -> cusparseStatus_t;
7797}
7798unsafe extern "C" {
7799    pub fn cusparseScsru2csr(
7800        handle: cusparseHandle_t,
7801        m: ::core::ffi::c_int,
7802        n: ::core::ffi::c_int,
7803        nnz: ::core::ffi::c_int,
7804        descrA: cusparseMatDescr_t,
7805        csrVal: *mut f32,
7806        csrRowPtr: *const ::core::ffi::c_int,
7807        csrColInd: *mut ::core::ffi::c_int,
7808        info: csru2csrInfo_t,
7809        pBuffer: *mut ::core::ffi::c_void,
7810    ) -> cusparseStatus_t;
7811}
7812unsafe extern "C" {
7813    pub fn cusparseDcsru2csr(
7814        handle: cusparseHandle_t,
7815        m: ::core::ffi::c_int,
7816        n: ::core::ffi::c_int,
7817        nnz: ::core::ffi::c_int,
7818        descrA: cusparseMatDescr_t,
7819        csrVal: *mut f64,
7820        csrRowPtr: *const ::core::ffi::c_int,
7821        csrColInd: *mut ::core::ffi::c_int,
7822        info: csru2csrInfo_t,
7823        pBuffer: *mut ::core::ffi::c_void,
7824    ) -> cusparseStatus_t;
7825}
7826unsafe extern "C" {
7827    pub fn cusparseCcsru2csr(
7828        handle: cusparseHandle_t,
7829        m: ::core::ffi::c_int,
7830        n: ::core::ffi::c_int,
7831        nnz: ::core::ffi::c_int,
7832        descrA: cusparseMatDescr_t,
7833        csrVal: *mut cuComplex,
7834        csrRowPtr: *const ::core::ffi::c_int,
7835        csrColInd: *mut ::core::ffi::c_int,
7836        info: csru2csrInfo_t,
7837        pBuffer: *mut ::core::ffi::c_void,
7838    ) -> cusparseStatus_t;
7839}
7840unsafe extern "C" {
7841    pub fn cusparseZcsru2csr(
7842        handle: cusparseHandle_t,
7843        m: ::core::ffi::c_int,
7844        n: ::core::ffi::c_int,
7845        nnz: ::core::ffi::c_int,
7846        descrA: cusparseMatDescr_t,
7847        csrVal: *mut cuDoubleComplex,
7848        csrRowPtr: *const ::core::ffi::c_int,
7849        csrColInd: *mut ::core::ffi::c_int,
7850        info: csru2csrInfo_t,
7851        pBuffer: *mut ::core::ffi::c_void,
7852    ) -> cusparseStatus_t;
7853}
7854unsafe extern "C" {
7855    pub fn cusparseScsr2csru(
7856        handle: cusparseHandle_t,
7857        m: ::core::ffi::c_int,
7858        n: ::core::ffi::c_int,
7859        nnz: ::core::ffi::c_int,
7860        descrA: cusparseMatDescr_t,
7861        csrVal: *mut f32,
7862        csrRowPtr: *const ::core::ffi::c_int,
7863        csrColInd: *mut ::core::ffi::c_int,
7864        info: csru2csrInfo_t,
7865        pBuffer: *mut ::core::ffi::c_void,
7866    ) -> cusparseStatus_t;
7867}
7868unsafe extern "C" {
7869    pub fn cusparseDcsr2csru(
7870        handle: cusparseHandle_t,
7871        m: ::core::ffi::c_int,
7872        n: ::core::ffi::c_int,
7873        nnz: ::core::ffi::c_int,
7874        descrA: cusparseMatDescr_t,
7875        csrVal: *mut f64,
7876        csrRowPtr: *const ::core::ffi::c_int,
7877        csrColInd: *mut ::core::ffi::c_int,
7878        info: csru2csrInfo_t,
7879        pBuffer: *mut ::core::ffi::c_void,
7880    ) -> cusparseStatus_t;
7881}
7882unsafe extern "C" {
7883    pub fn cusparseCcsr2csru(
7884        handle: cusparseHandle_t,
7885        m: ::core::ffi::c_int,
7886        n: ::core::ffi::c_int,
7887        nnz: ::core::ffi::c_int,
7888        descrA: cusparseMatDescr_t,
7889        csrVal: *mut cuComplex,
7890        csrRowPtr: *const ::core::ffi::c_int,
7891        csrColInd: *mut ::core::ffi::c_int,
7892        info: csru2csrInfo_t,
7893        pBuffer: *mut ::core::ffi::c_void,
7894    ) -> cusparseStatus_t;
7895}
7896unsafe extern "C" {
7897    pub fn cusparseZcsr2csru(
7898        handle: cusparseHandle_t,
7899        m: ::core::ffi::c_int,
7900        n: ::core::ffi::c_int,
7901        nnz: ::core::ffi::c_int,
7902        descrA: cusparseMatDescr_t,
7903        csrVal: *mut cuDoubleComplex,
7904        csrRowPtr: *const ::core::ffi::c_int,
7905        csrColInd: *mut ::core::ffi::c_int,
7906        info: csru2csrInfo_t,
7907        pBuffer: *mut ::core::ffi::c_void,
7908    ) -> cusparseStatus_t;
7909}
7910unsafe extern "C" {
7911    pub fn cusparseSpruneDense2csr_bufferSizeExt(
7912        handle: cusparseHandle_t,
7913        m: ::core::ffi::c_int,
7914        n: ::core::ffi::c_int,
7915        A: *const f32,
7916        lda: ::core::ffi::c_int,
7917        threshold: *const f32,
7918        descrC: cusparseMatDescr_t,
7919        csrSortedValC: *const f32,
7920        csrSortedRowPtrC: *const ::core::ffi::c_int,
7921        csrSortedColIndC: *const ::core::ffi::c_int,
7922        pBufferSizeInBytes: *mut size_t,
7923    ) -> cusparseStatus_t;
7924}
7925unsafe extern "C" {
7926    pub fn cusparseDpruneDense2csr_bufferSizeExt(
7927        handle: cusparseHandle_t,
7928        m: ::core::ffi::c_int,
7929        n: ::core::ffi::c_int,
7930        A: *const f64,
7931        lda: ::core::ffi::c_int,
7932        threshold: *const f64,
7933        descrC: cusparseMatDescr_t,
7934        csrSortedValC: *const f64,
7935        csrSortedRowPtrC: *const ::core::ffi::c_int,
7936        csrSortedColIndC: *const ::core::ffi::c_int,
7937        pBufferSizeInBytes: *mut size_t,
7938    ) -> cusparseStatus_t;
7939}
7940unsafe extern "C" {
7941    pub fn cusparseSpruneDense2csrNnz(
7942        handle: cusparseHandle_t,
7943        m: ::core::ffi::c_int,
7944        n: ::core::ffi::c_int,
7945        A: *const f32,
7946        lda: ::core::ffi::c_int,
7947        threshold: *const f32,
7948        descrC: cusparseMatDescr_t,
7949        csrRowPtrC: *mut ::core::ffi::c_int,
7950        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
7951        pBuffer: *mut ::core::ffi::c_void,
7952    ) -> cusparseStatus_t;
7953}
7954unsafe extern "C" {
7955    pub fn cusparseDpruneDense2csrNnz(
7956        handle: cusparseHandle_t,
7957        m: ::core::ffi::c_int,
7958        n: ::core::ffi::c_int,
7959        A: *const f64,
7960        lda: ::core::ffi::c_int,
7961        threshold: *const f64,
7962        descrC: cusparseMatDescr_t,
7963        csrSortedRowPtrC: *mut ::core::ffi::c_int,
7964        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
7965        pBuffer: *mut ::core::ffi::c_void,
7966    ) -> cusparseStatus_t;
7967}
7968unsafe extern "C" {
7969    pub fn cusparseSpruneDense2csr(
7970        handle: cusparseHandle_t,
7971        m: ::core::ffi::c_int,
7972        n: ::core::ffi::c_int,
7973        A: *const f32,
7974        lda: ::core::ffi::c_int,
7975        threshold: *const f32,
7976        descrC: cusparseMatDescr_t,
7977        csrSortedValC: *mut f32,
7978        csrSortedRowPtrC: *const ::core::ffi::c_int,
7979        csrSortedColIndC: *mut ::core::ffi::c_int,
7980        pBuffer: *mut ::core::ffi::c_void,
7981    ) -> cusparseStatus_t;
7982}
7983unsafe extern "C" {
7984    pub fn cusparseDpruneDense2csr(
7985        handle: cusparseHandle_t,
7986        m: ::core::ffi::c_int,
7987        n: ::core::ffi::c_int,
7988        A: *const f64,
7989        lda: ::core::ffi::c_int,
7990        threshold: *const f64,
7991        descrC: cusparseMatDescr_t,
7992        csrSortedValC: *mut f64,
7993        csrSortedRowPtrC: *const ::core::ffi::c_int,
7994        csrSortedColIndC: *mut ::core::ffi::c_int,
7995        pBuffer: *mut ::core::ffi::c_void,
7996    ) -> cusparseStatus_t;
7997}
7998unsafe extern "C" {
7999    pub fn cusparseSpruneCsr2csr_bufferSizeExt(
8000        handle: cusparseHandle_t,
8001        m: ::core::ffi::c_int,
8002        n: ::core::ffi::c_int,
8003        nnzA: ::core::ffi::c_int,
8004        descrA: cusparseMatDescr_t,
8005        csrSortedValA: *const f32,
8006        csrSortedRowPtrA: *const ::core::ffi::c_int,
8007        csrSortedColIndA: *const ::core::ffi::c_int,
8008        threshold: *const f32,
8009        descrC: cusparseMatDescr_t,
8010        csrSortedValC: *const f32,
8011        csrSortedRowPtrC: *const ::core::ffi::c_int,
8012        csrSortedColIndC: *const ::core::ffi::c_int,
8013        pBufferSizeInBytes: *mut size_t,
8014    ) -> cusparseStatus_t;
8015}
8016unsafe extern "C" {
8017    pub fn cusparseDpruneCsr2csr_bufferSizeExt(
8018        handle: cusparseHandle_t,
8019        m: ::core::ffi::c_int,
8020        n: ::core::ffi::c_int,
8021        nnzA: ::core::ffi::c_int,
8022        descrA: cusparseMatDescr_t,
8023        csrSortedValA: *const f64,
8024        csrSortedRowPtrA: *const ::core::ffi::c_int,
8025        csrSortedColIndA: *const ::core::ffi::c_int,
8026        threshold: *const f64,
8027        descrC: cusparseMatDescr_t,
8028        csrSortedValC: *const f64,
8029        csrSortedRowPtrC: *const ::core::ffi::c_int,
8030        csrSortedColIndC: *const ::core::ffi::c_int,
8031        pBufferSizeInBytes: *mut size_t,
8032    ) -> cusparseStatus_t;
8033}
8034unsafe extern "C" {
8035    pub fn cusparseSpruneCsr2csrNnz(
8036        handle: cusparseHandle_t,
8037        m: ::core::ffi::c_int,
8038        n: ::core::ffi::c_int,
8039        nnzA: ::core::ffi::c_int,
8040        descrA: cusparseMatDescr_t,
8041        csrSortedValA: *const f32,
8042        csrSortedRowPtrA: *const ::core::ffi::c_int,
8043        csrSortedColIndA: *const ::core::ffi::c_int,
8044        threshold: *const f32,
8045        descrC: cusparseMatDescr_t,
8046        csrSortedRowPtrC: *mut ::core::ffi::c_int,
8047        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8048        pBuffer: *mut ::core::ffi::c_void,
8049    ) -> cusparseStatus_t;
8050}
8051unsafe extern "C" {
8052    pub fn cusparseDpruneCsr2csrNnz(
8053        handle: cusparseHandle_t,
8054        m: ::core::ffi::c_int,
8055        n: ::core::ffi::c_int,
8056        nnzA: ::core::ffi::c_int,
8057        descrA: cusparseMatDescr_t,
8058        csrSortedValA: *const f64,
8059        csrSortedRowPtrA: *const ::core::ffi::c_int,
8060        csrSortedColIndA: *const ::core::ffi::c_int,
8061        threshold: *const f64,
8062        descrC: cusparseMatDescr_t,
8063        csrSortedRowPtrC: *mut ::core::ffi::c_int,
8064        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8065        pBuffer: *mut ::core::ffi::c_void,
8066    ) -> cusparseStatus_t;
8067}
8068unsafe extern "C" {
8069    pub fn cusparseSpruneCsr2csr(
8070        handle: cusparseHandle_t,
8071        m: ::core::ffi::c_int,
8072        n: ::core::ffi::c_int,
8073        nnzA: ::core::ffi::c_int,
8074        descrA: cusparseMatDescr_t,
8075        csrSortedValA: *const f32,
8076        csrSortedRowPtrA: *const ::core::ffi::c_int,
8077        csrSortedColIndA: *const ::core::ffi::c_int,
8078        threshold: *const f32,
8079        descrC: cusparseMatDescr_t,
8080        csrSortedValC: *mut f32,
8081        csrSortedRowPtrC: *const ::core::ffi::c_int,
8082        csrSortedColIndC: *mut ::core::ffi::c_int,
8083        pBuffer: *mut ::core::ffi::c_void,
8084    ) -> cusparseStatus_t;
8085}
8086unsafe extern "C" {
8087    pub fn cusparseDpruneCsr2csr(
8088        handle: cusparseHandle_t,
8089        m: ::core::ffi::c_int,
8090        n: ::core::ffi::c_int,
8091        nnzA: ::core::ffi::c_int,
8092        descrA: cusparseMatDescr_t,
8093        csrSortedValA: *const f64,
8094        csrSortedRowPtrA: *const ::core::ffi::c_int,
8095        csrSortedColIndA: *const ::core::ffi::c_int,
8096        threshold: *const f64,
8097        descrC: cusparseMatDescr_t,
8098        csrSortedValC: *mut f64,
8099        csrSortedRowPtrC: *const ::core::ffi::c_int,
8100        csrSortedColIndC: *mut ::core::ffi::c_int,
8101        pBuffer: *mut ::core::ffi::c_void,
8102    ) -> cusparseStatus_t;
8103}
8104unsafe extern "C" {
8105    pub fn cusparseSpruneDense2csrByPercentage_bufferSizeExt(
8106        handle: cusparseHandle_t,
8107        m: ::core::ffi::c_int,
8108        n: ::core::ffi::c_int,
8109        A: *const f32,
8110        lda: ::core::ffi::c_int,
8111        percentage: f32,
8112        descrC: cusparseMatDescr_t,
8113        csrSortedValC: *const f32,
8114        csrSortedRowPtrC: *const ::core::ffi::c_int,
8115        csrSortedColIndC: *const ::core::ffi::c_int,
8116        info: pruneInfo_t,
8117        pBufferSizeInBytes: *mut size_t,
8118    ) -> cusparseStatus_t;
8119}
8120unsafe extern "C" {
8121    pub fn cusparseDpruneDense2csrByPercentage_bufferSizeExt(
8122        handle: cusparseHandle_t,
8123        m: ::core::ffi::c_int,
8124        n: ::core::ffi::c_int,
8125        A: *const f64,
8126        lda: ::core::ffi::c_int,
8127        percentage: f32,
8128        descrC: cusparseMatDescr_t,
8129        csrSortedValC: *const f64,
8130        csrSortedRowPtrC: *const ::core::ffi::c_int,
8131        csrSortedColIndC: *const ::core::ffi::c_int,
8132        info: pruneInfo_t,
8133        pBufferSizeInBytes: *mut size_t,
8134    ) -> cusparseStatus_t;
8135}
8136unsafe extern "C" {
8137    pub fn cusparseSpruneDense2csrNnzByPercentage(
8138        handle: cusparseHandle_t,
8139        m: ::core::ffi::c_int,
8140        n: ::core::ffi::c_int,
8141        A: *const f32,
8142        lda: ::core::ffi::c_int,
8143        percentage: f32,
8144        descrC: cusparseMatDescr_t,
8145        csrRowPtrC: *mut ::core::ffi::c_int,
8146        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8147        info: pruneInfo_t,
8148        pBuffer: *mut ::core::ffi::c_void,
8149    ) -> cusparseStatus_t;
8150}
8151unsafe extern "C" {
8152    pub fn cusparseDpruneDense2csrNnzByPercentage(
8153        handle: cusparseHandle_t,
8154        m: ::core::ffi::c_int,
8155        n: ::core::ffi::c_int,
8156        A: *const f64,
8157        lda: ::core::ffi::c_int,
8158        percentage: f32,
8159        descrC: cusparseMatDescr_t,
8160        csrRowPtrC: *mut ::core::ffi::c_int,
8161        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8162        info: pruneInfo_t,
8163        pBuffer: *mut ::core::ffi::c_void,
8164    ) -> cusparseStatus_t;
8165}
8166unsafe extern "C" {
8167    pub fn cusparseSpruneDense2csrByPercentage(
8168        handle: cusparseHandle_t,
8169        m: ::core::ffi::c_int,
8170        n: ::core::ffi::c_int,
8171        A: *const f32,
8172        lda: ::core::ffi::c_int,
8173        percentage: f32,
8174        descrC: cusparseMatDescr_t,
8175        csrSortedValC: *mut f32,
8176        csrSortedRowPtrC: *const ::core::ffi::c_int,
8177        csrSortedColIndC: *mut ::core::ffi::c_int,
8178        info: pruneInfo_t,
8179        pBuffer: *mut ::core::ffi::c_void,
8180    ) -> cusparseStatus_t;
8181}
8182unsafe extern "C" {
8183    pub fn cusparseDpruneDense2csrByPercentage(
8184        handle: cusparseHandle_t,
8185        m: ::core::ffi::c_int,
8186        n: ::core::ffi::c_int,
8187        A: *const f64,
8188        lda: ::core::ffi::c_int,
8189        percentage: f32,
8190        descrC: cusparseMatDescr_t,
8191        csrSortedValC: *mut f64,
8192        csrSortedRowPtrC: *const ::core::ffi::c_int,
8193        csrSortedColIndC: *mut ::core::ffi::c_int,
8194        info: pruneInfo_t,
8195        pBuffer: *mut ::core::ffi::c_void,
8196    ) -> cusparseStatus_t;
8197}
8198unsafe extern "C" {
8199    pub fn cusparseSpruneCsr2csrByPercentage_bufferSizeExt(
8200        handle: cusparseHandle_t,
8201        m: ::core::ffi::c_int,
8202        n: ::core::ffi::c_int,
8203        nnzA: ::core::ffi::c_int,
8204        descrA: cusparseMatDescr_t,
8205        csrSortedValA: *const f32,
8206        csrSortedRowPtrA: *const ::core::ffi::c_int,
8207        csrSortedColIndA: *const ::core::ffi::c_int,
8208        percentage: f32,
8209        descrC: cusparseMatDescr_t,
8210        csrSortedValC: *const f32,
8211        csrSortedRowPtrC: *const ::core::ffi::c_int,
8212        csrSortedColIndC: *const ::core::ffi::c_int,
8213        info: pruneInfo_t,
8214        pBufferSizeInBytes: *mut size_t,
8215    ) -> cusparseStatus_t;
8216}
8217unsafe extern "C" {
8218    pub fn cusparseDpruneCsr2csrByPercentage_bufferSizeExt(
8219        handle: cusparseHandle_t,
8220        m: ::core::ffi::c_int,
8221        n: ::core::ffi::c_int,
8222        nnzA: ::core::ffi::c_int,
8223        descrA: cusparseMatDescr_t,
8224        csrSortedValA: *const f64,
8225        csrSortedRowPtrA: *const ::core::ffi::c_int,
8226        csrSortedColIndA: *const ::core::ffi::c_int,
8227        percentage: f32,
8228        descrC: cusparseMatDescr_t,
8229        csrSortedValC: *const f64,
8230        csrSortedRowPtrC: *const ::core::ffi::c_int,
8231        csrSortedColIndC: *const ::core::ffi::c_int,
8232        info: pruneInfo_t,
8233        pBufferSizeInBytes: *mut size_t,
8234    ) -> cusparseStatus_t;
8235}
8236unsafe extern "C" {
8237    pub fn cusparseSpruneCsr2csrNnzByPercentage(
8238        handle: cusparseHandle_t,
8239        m: ::core::ffi::c_int,
8240        n: ::core::ffi::c_int,
8241        nnzA: ::core::ffi::c_int,
8242        descrA: cusparseMatDescr_t,
8243        csrSortedValA: *const f32,
8244        csrSortedRowPtrA: *const ::core::ffi::c_int,
8245        csrSortedColIndA: *const ::core::ffi::c_int,
8246        percentage: f32,
8247        descrC: cusparseMatDescr_t,
8248        csrSortedRowPtrC: *mut ::core::ffi::c_int,
8249        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8250        info: pruneInfo_t,
8251        pBuffer: *mut ::core::ffi::c_void,
8252    ) -> cusparseStatus_t;
8253}
8254unsafe extern "C" {
8255    pub fn cusparseDpruneCsr2csrNnzByPercentage(
8256        handle: cusparseHandle_t,
8257        m: ::core::ffi::c_int,
8258        n: ::core::ffi::c_int,
8259        nnzA: ::core::ffi::c_int,
8260        descrA: cusparseMatDescr_t,
8261        csrSortedValA: *const f64,
8262        csrSortedRowPtrA: *const ::core::ffi::c_int,
8263        csrSortedColIndA: *const ::core::ffi::c_int,
8264        percentage: f32,
8265        descrC: cusparseMatDescr_t,
8266        csrSortedRowPtrC: *mut ::core::ffi::c_int,
8267        nnzTotalDevHostPtr: *mut ::core::ffi::c_int,
8268        info: pruneInfo_t,
8269        pBuffer: *mut ::core::ffi::c_void,
8270    ) -> cusparseStatus_t;
8271}
8272unsafe extern "C" {
8273    pub fn cusparseSpruneCsr2csrByPercentage(
8274        handle: cusparseHandle_t,
8275        m: ::core::ffi::c_int,
8276        n: ::core::ffi::c_int,
8277        nnzA: ::core::ffi::c_int,
8278        descrA: cusparseMatDescr_t,
8279        csrSortedValA: *const f32,
8280        csrSortedRowPtrA: *const ::core::ffi::c_int,
8281        csrSortedColIndA: *const ::core::ffi::c_int,
8282        percentage: f32,
8283        descrC: cusparseMatDescr_t,
8284        csrSortedValC: *mut f32,
8285        csrSortedRowPtrC: *const ::core::ffi::c_int,
8286        csrSortedColIndC: *mut ::core::ffi::c_int,
8287        info: pruneInfo_t,
8288        pBuffer: *mut ::core::ffi::c_void,
8289    ) -> cusparseStatus_t;
8290}
8291unsafe extern "C" {
8292    pub fn cusparseDpruneCsr2csrByPercentage(
8293        handle: cusparseHandle_t,
8294        m: ::core::ffi::c_int,
8295        n: ::core::ffi::c_int,
8296        nnzA: ::core::ffi::c_int,
8297        descrA: cusparseMatDescr_t,
8298        csrSortedValA: *const f64,
8299        csrSortedRowPtrA: *const ::core::ffi::c_int,
8300        csrSortedColIndA: *const ::core::ffi::c_int,
8301        percentage: f32,
8302        descrC: cusparseMatDescr_t,
8303        csrSortedValC: *mut f64,
8304        csrSortedRowPtrC: *const ::core::ffi::c_int,
8305        csrSortedColIndC: *mut ::core::ffi::c_int,
8306        info: pruneInfo_t,
8307        pBuffer: *mut ::core::ffi::c_void,
8308    ) -> cusparseStatus_t;
8309}
8310impl cusparseCsr2CscAlg_t {
8311    pub const CUSPARSE_CSR2CSC_ALG1: cusparseCsr2CscAlg_t = cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT;
8312}
8313#[repr(u32)]
8314#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
8315pub enum cusparseCsr2CscAlg_t {
8316    CUSPARSE_CSR2CSC_ALG_DEFAULT = 1,
8317}
8318unsafe extern "C" {
8319    /// This function converts a sparse matrix in CSR format (that is defined by the three arrays `csrVal`, `csrRowPtr`, and `csrColInd`) into a sparse matrix in CSC format (that is defined by arrays `cscVal`, `cscRowInd`, and `cscColPtr`). The resulting matrix can also be seen as the transpose of the original sparse matrix. Notice that this routine can also be used to convert a matrix in CSC format into a matrix in CSR format.
8320    ///
8321    /// The routine requires extra storage proportional to the number of nonzero values `nnz`. It provides in output always the same matrix.
8322    ///
8323    /// It is executed asynchronously with respect to the host, and it may return control to the application on the host before the result is ready.
8324    ///
8325    /// The function [`cusparseCsr2cscEx2_bufferSize`] returns the size of the workspace needed by [`cusparseCsr2cscEx2`]. User needs to allocate a buffer of this size and give that buffer to [`cusparseCsr2cscEx2`] as an argument.
8326    ///
8327    /// If `nnz == 0`, then `csrColInd`, `csrVal`, `cscVal`, and `cscRowInd` could have `NULL` value. In this case, `cscColPtr` is set to `idxBase` for all values.
8328    ///
8329    /// If `m == 0` or `n == 0`, the pointers are not checked and the routine returns [`cusparseStatus_t::CUSPARSE_STATUS_SUCCESS`].
8330    ///
8331    /// [`cusparseCsr2cscEx2`] supports the following data types:
8332    ///
8333    /// | `X`/`Y` |
8334    /// | --- |
8335    /// | [`cudaDataType_t::CUDA_R_8I`] |
8336    /// | [`cudaDataType_t::CUDA_R_16F`] |
8337    /// | [`cudaDataType_t::CUDA_R_16BF`] |
8338    /// | [`cudaDataType_t::CUDA_R_32F`] |
8339    /// | [`cudaDataType_t::CUDA_R_64F`] |
8340    /// | [`cudaDataType_t::CUDA_C_16F`] [DEPRECATED] |
8341    /// | [`cudaDataType_t::CUDA_C_16BF`] [DEPRECATED] |
8342    /// | [`cudaDataType_t::CUDA_C_32F`] |
8343    /// | [`cudaDataType_t::CUDA_C_64F`] |
8344    ///
8345    /// [`cusparseCsr2cscEx2`] supports the following algorithms ([`cusparseCsr2CscAlg_t`]):
8346    ///
8347    /// | Algorithm | Notes |
8348    /// | --- | --- |
8349    /// | [`cusparseCsr2CscAlg_t::CUSPARSE_CSR2CSC_ALG_DEFAULT`], `CUSPARSE_CSR2CSC_ALG1` | Default algorithm |
8350    ///
8351    /// | Action | Notes |
8352    /// | --- | --- |
8353    /// | [`cusparseAction_t::CUSPARSE_ACTION_SYMBOLIC`] | Compute the “structure” of the CSC output matrix (offset, row indices) |
8354    /// | [`cusparseAction_t::CUSPARSE_ACTION_NUMERIC`] | Compute the “structure” of the CSC output matrix and copy the values |
8355    ///
8356    /// [`cusparseCsr2cscEx2`] has the following properties:
8357    ///
8358    /// * The routine requires no extra storage
8359    /// * The routine supports asynchronous execution
8360    ///
8361    /// [`cusparseCsr2cscEx2`] supports the following optimizations:
8362    ///
8363    /// * CUDA graph capture
8364    /// * Hardware Memory Compression.
8365    ///
8366    /// # Parameters
8367    ///
8368    /// - `handle`: Handle to the cuSPARSE library context.
8369    /// - `m`: Number of rows of the CSR input matrix; number of columns of the CSC ouput matrix.
8370    /// - `n`: Number of columns of the CSR input matrix; number of rows of the CSC ouput matrix.
8371    /// - `nnz`: Number of nonzero elements of the CSR and CSC matrices.
8372    /// - `csrVal`: Value array of size `nnz` of the CSR matrix; of same type as `valType`.
8373    /// - `csrRowPtr`: Integer array of size `m + 1` that containes the CSR row offsets.
8374    /// - `csrColInd`: Integer array of size `nnz` that containes the CSR column indices.
8375    /// - `cscVal`: Value array of size `nnz` of the CSC matrix; of same type as `valType`.
8376    /// - `cscColPtr`: Integer array of size `n + 1` that containes the CSC column offsets.
8377    /// - `cscRowInd`: Integer array of size `nnz` that containes the CSC row indices.
8378    /// - `valType`: Value type for both CSR and CSC matrices.
8379    /// - `copyValues`: [`cusparseAction_t::CUSPARSE_ACTION_SYMBOLIC`] or [`cusparseAction_t::CUSPARSE_ACTION_NUMERIC`].
8380    /// - `idxBase`: Index base [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ZERO`] or [`cusparseIndexBase_t::CUSPARSE_INDEX_BASE_ONE`].
8381    /// - `alg`: Algorithm implementation. see [`cusparseCsr2CscAlg_t`] for possible values.
8382    /// - `buffer`: Pointer to workspace buffer.
8383    pub fn cusparseCsr2cscEx2(
8384        handle: cusparseHandle_t,
8385        m: ::core::ffi::c_int,
8386        n: ::core::ffi::c_int,
8387        nnz: ::core::ffi::c_int,
8388        csrVal: *const ::core::ffi::c_void,
8389        csrRowPtr: *const ::core::ffi::c_int,
8390        csrColInd: *const ::core::ffi::c_int,
8391        cscVal: *mut ::core::ffi::c_void,
8392        cscColPtr: *mut ::core::ffi::c_int,
8393        cscRowInd: *mut ::core::ffi::c_int,
8394        valType: cudaDataType,
8395        copyValues: cusparseAction_t,
8396        idxBase: cusparseIndexBase_t,
8397        alg: cusparseCsr2CscAlg_t,
8398        buffer: *mut ::core::ffi::c_void,
8399    ) -> cusparseStatus_t;
8400}
8401unsafe extern "C" {
8402    pub fn cusparseCsr2cscEx2_bufferSize(
8403        handle: cusparseHandle_t,
8404        m: ::core::ffi::c_int,
8405        n: ::core::ffi::c_int,
8406        nnz: ::core::ffi::c_int,
8407        csrVal: *const ::core::ffi::c_void,
8408        csrRowPtr: *const ::core::ffi::c_int,
8409        csrColInd: *const ::core::ffi::c_int,
8410        cscVal: *mut ::core::ffi::c_void,
8411        cscColPtr: *mut ::core::ffi::c_int,
8412        cscRowInd: *mut ::core::ffi::c_int,
8413        valType: cudaDataType,
8414        copyValues: cusparseAction_t,
8415        idxBase: cusparseIndexBase_t,
8416        alg: cusparseCsr2CscAlg_t,
8417        bufferSize: *mut size_t,
8418    ) -> cusparseStatus_t;
8419}
8420/// This type indicates the format of the sparse matrix.
8421/// See cuSPARSE Storage Formats for their description.
8422#[repr(u32)]
8423#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
8424pub enum cusparseFormat_t {
8425    /// The matrix is stored in Compressed Sparse Row (CSR) format.
8426    CUSPARSE_FORMAT_CSR = 1,
8427    /// The matrix is stored in Compressed Sparse Column (CSC) format.
8428    CUSPARSE_FORMAT_CSC = 2,
8429    /// The matrix is stored in Coordinate (COO) format organized in *Structure of Arrays (SoA)* layout.
8430    CUSPARSE_FORMAT_COO = 3,
8431    /// The matrix is stored in Blocked-Ellpack (Blocked-ELL) format.
8432    CUSPARSE_FORMAT_BLOCKED_ELL = 5,
8433    /// The matrix is stored in Block Sparse Row (BSR) format.
8434    CUSPARSE_FORMAT_BSR = 6,
8435    CUSPARSE_FORMAT_SLICED_ELLPACK = 7,
8436}
8437/// This type indicates the memory layout of a dense matrix.
8438#[repr(u32)]
8439#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
8440pub enum cusparseOrder_t {
8441    /// The matrix is stored in column-major.
8442    CUSPARSE_ORDER_COL = 1,
8443    /// The matrix is stored in row-major.
8444    CUSPARSE_ORDER_ROW = 2,
8445}
8446/// This type indicates the index type for representing the sparse matrix indices.
8447#[repr(u32)]
8448#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
8449pub enum cusparseIndexType_t {
8450    CUSPARSE_INDEX_16U = 1,
8451    /// 32-bit signed integer [0, 2^31 - 1].
8452    CUSPARSE_INDEX_32I = 2,
8453    /// 64-bit signed integer [0, 2^63 - 1].
8454    CUSPARSE_INDEX_64I = 3,
8455}
8456#[repr(C)]
8457#[derive(Debug, Copy, Clone)]
8458pub struct cusparseSpVecDescr {
8459    _unused: [u8; 0],
8460}
8461#[repr(C)]
8462#[derive(Debug, Copy, Clone)]
8463pub struct cusparseDnVecDescr {
8464    _unused: [u8; 0],
8465}
8466#[repr(C)]
8467#[derive(Debug, Copy, Clone)]
8468pub struct cusparseSpMatDescr {
8469    _unused: [u8; 0],
8470}
8471#[repr(C)]
8472#[derive(Debug, Copy, Clone)]
8473pub struct cusparseDnMatDescr {
8474    _unused: [u8; 0],
8475}
8476pub type cusparseSpVecDescr_t = *mut cusparseSpVecDescr;
8477pub type cusparseDnVecDescr_t = *mut cusparseDnVecDescr;
8478pub type cusparseSpMatDescr_t = *mut cusparseSpMatDescr;
8479pub type cusparseDnMatDescr_t = *mut cusparseDnMatDescr;
8480pub type cusparseConstSpVecDescr_t = *const cusparseSpVecDescr;
8481pub type cusparseConstDnVecDescr_t = *const cusparseDnVecDescr;
8482pub type cusparseConstSpMatDescr_t = *const cusparseSpMatDescr;
8483pub type cusparseConstDnMatDescr_t = *const cusparseDnMatDescr;
8484unsafe extern "C" {
8485    /// This function initializes the sparse matrix descriptor `spVecDescr`.
8486    ///
8487    /// [`cusparseCreateSpVec`] has the following constraints:
8488    ///
8489    /// * `indices` and `values` must be aligned to the size of the datatypes specified by `idxType` and `valueType`, respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8490    ///
8491    /// # Parameters
8492    ///
8493    /// - `spVecDescr`: Sparse vector descriptor.
8494    /// - `size`: Size of the sparse vector.
8495    /// - `nnz`: Number of non-zero entries of the sparse vector.
8496    /// - `indices`: Indices of the sparse vector. Array with `nnz` elements.
8497    /// - `values`: Values of the sparse vector. Array with `nnz` elements.
8498    /// - `idxType`: Enumerator specifying the data type of `indices`.
8499    /// - `idxBase`: Enumerator specifying the the index base of `indices`.
8500    /// - `valueType`: Enumerator specifying the datatype of `values`.
8501    pub fn cusparseCreateSpVec(
8502        spVecDescr: *mut cusparseSpVecDescr_t,
8503        size: i64,
8504        nnz: i64,
8505        indices: *mut ::core::ffi::c_void,
8506        values: *mut ::core::ffi::c_void,
8507        idxType: cusparseIndexType_t,
8508        idxBase: cusparseIndexBase_t,
8509        valueType: cudaDataType,
8510    ) -> cusparseStatus_t;
8511}
8512unsafe extern "C" {
8513    pub fn cusparseCreateConstSpVec(
8514        spVecDescr: *mut cusparseConstSpVecDescr_t,
8515        size: i64,
8516        nnz: i64,
8517        indices: *const ::core::ffi::c_void,
8518        values: *const ::core::ffi::c_void,
8519        idxType: cusparseIndexType_t,
8520        idxBase: cusparseIndexBase_t,
8521        valueType: cudaDataType,
8522    ) -> cusparseStatus_t;
8523}
8524unsafe extern "C" {
8525    /// This function releases the host memory allocated for the sparse vector descriptor `spVecDescr`.
8526    ///
8527    /// # Parameters
8528    ///
8529    /// - `spVecDescr`: Sparse vector descriptor.
8530    pub fn cusparseDestroySpVec(
8531        spVecDescr: cusparseConstSpVecDescr_t,
8532    ) -> cusparseStatus_t;
8533}
8534unsafe extern "C" {
8535    /// This function returns the fields of the sparse vector descriptor `spVecDescr`.
8536    ///
8537    /// # Parameters
8538    ///
8539    /// - `spVecDescr`: Sparse vector descriptor.
8540    /// - `size`: Size of the sparse vector.
8541    /// - `nnz`: Number of non-zero entries of the sparse vector.
8542    /// - `indices`: Indices of the sparse vector. Array with `nnz` elements.
8543    /// - `values`: Values of the sparse vector. Array with `nnz` elements.
8544    /// - `idxType`: Enumerator specifying the data type of `indices`.
8545    /// - `idxBase`: Enumerator specifying the the index base of `indices`.
8546    /// - `valueType`: Enumerator specifying the datatype of `values`.
8547    pub fn cusparseSpVecGet(
8548        spVecDescr: cusparseSpVecDescr_t,
8549        size: *mut i64,
8550        nnz: *mut i64,
8551        indices: *mut *mut ::core::ffi::c_void,
8552        values: *mut *mut ::core::ffi::c_void,
8553        idxType: *mut cusparseIndexType_t,
8554        idxBase: *mut cusparseIndexBase_t,
8555        valueType: *mut cudaDataType,
8556    ) -> cusparseStatus_t;
8557}
8558unsafe extern "C" {
8559    pub fn cusparseConstSpVecGet(
8560        spVecDescr: cusparseConstSpVecDescr_t,
8561        size: *mut i64,
8562        nnz: *mut i64,
8563        indices: *mut *const ::core::ffi::c_void,
8564        values: *mut *const ::core::ffi::c_void,
8565        idxType: *mut cusparseIndexType_t,
8566        idxBase: *mut cusparseIndexBase_t,
8567        valueType: *mut cudaDataType,
8568    ) -> cusparseStatus_t;
8569}
8570unsafe extern "C" {
8571    /// This function returns the `idxBase` field of the sparse vector descriptor `spVecDescr`.
8572    ///
8573    /// # Parameters
8574    ///
8575    /// - `spVecDescr`: Sparse vector descriptor.
8576    /// - `idxBase`: Enumerator specifying the the index base of `indices`.
8577    pub fn cusparseSpVecGetIndexBase(
8578        spVecDescr: cusparseConstSpVecDescr_t,
8579        idxBase: *mut cusparseIndexBase_t,
8580    ) -> cusparseStatus_t;
8581}
8582unsafe extern "C" {
8583    /// This function returns the `values` field of the sparse vector descriptor `spVecDescr`.
8584    ///
8585    /// # Parameters
8586    ///
8587    /// - `spVecDescr`: Sparse vector descriptor.
8588    /// - `values`: Values of the sparse vector. Array with `nnz` elements.
8589    pub fn cusparseSpVecGetValues(
8590        spVecDescr: cusparseSpVecDescr_t,
8591        values: *mut *mut ::core::ffi::c_void,
8592    ) -> cusparseStatus_t;
8593}
8594unsafe extern "C" {
8595    pub fn cusparseConstSpVecGetValues(
8596        spVecDescr: cusparseConstSpVecDescr_t,
8597        values: *mut *const ::core::ffi::c_void,
8598    ) -> cusparseStatus_t;
8599}
8600unsafe extern "C" {
8601    /// This function set the `values` field of the sparse vector descriptor `spVecDescr`.
8602    ///
8603    /// [`cusparseDnVecSetValues`] has the following constraints:
8604    ///
8605    /// * `values` must be aligned to the size of the datatype specified in `spVecDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8606    ///
8607    /// # Parameters
8608    ///
8609    /// - `spVecDescr`: Sparse vector descriptor.
8610    /// - `values`: Values of the sparse vector. Array with `nnz` elements.
8611    pub fn cusparseSpVecSetValues(
8612        spVecDescr: cusparseSpVecDescr_t,
8613        values: *mut ::core::ffi::c_void,
8614    ) -> cusparseStatus_t;
8615}
8616unsafe extern "C" {
8617    /// This function initializes the dense vector descriptor `dnVecDescr`.
8618    ///
8619    /// [`cusparseCreateDnVec`] has the following constraints:
8620    ///
8621    /// * `values` must be aligned to the size of the datatype specified by `valueType`. Refer to [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8622    ///
8623    /// # Parameters
8624    ///
8625    /// - `dnVecDescr`: Dense vector descriptor.
8626    /// - `size`: Size of the dense vector.
8627    /// - `values`: Values of the dense vector. Array with `size` elements.
8628    /// - `valueType`: Enumerator specifying the datatype of `values`.
8629    pub fn cusparseCreateDnVec(
8630        dnVecDescr: *mut cusparseDnVecDescr_t,
8631        size: i64,
8632        values: *mut ::core::ffi::c_void,
8633        valueType: cudaDataType,
8634    ) -> cusparseStatus_t;
8635}
8636unsafe extern "C" {
8637    pub fn cusparseCreateConstDnVec(
8638        dnVecDescr: *mut cusparseConstDnVecDescr_t,
8639        size: i64,
8640        values: *const ::core::ffi::c_void,
8641        valueType: cudaDataType,
8642    ) -> cusparseStatus_t;
8643}
8644unsafe extern "C" {
8645    /// This function releases the host memory allocated for the dense vector descriptor `dnVecDescr`.
8646    ///
8647    /// # Parameters
8648    ///
8649    /// - `dnVecDescr`: Dense vector descriptor.
8650    pub fn cusparseDestroyDnVec(
8651        dnVecDescr: cusparseConstDnVecDescr_t,
8652    ) -> cusparseStatus_t;
8653}
8654unsafe extern "C" {
8655    /// This function returns the fields of the dense vector descriptor `dnVecDescr`.
8656    ///
8657    /// # Parameters
8658    ///
8659    /// - `dnVecDescr`: Dense vector descriptor.
8660    /// - `size`: Size of the dense vector.
8661    /// - `values`: Values of the dense vector. Array with `nnz` elements.
8662    /// - `valueType`: Enumerator specifying the datatype of `values`.
8663    pub fn cusparseDnVecGet(
8664        dnVecDescr: cusparseDnVecDescr_t,
8665        size: *mut i64,
8666        values: *mut *mut ::core::ffi::c_void,
8667        valueType: *mut cudaDataType,
8668    ) -> cusparseStatus_t;
8669}
8670unsafe extern "C" {
8671    pub fn cusparseConstDnVecGet(
8672        dnVecDescr: cusparseConstDnVecDescr_t,
8673        size: *mut i64,
8674        values: *mut *const ::core::ffi::c_void,
8675        valueType: *mut cudaDataType,
8676    ) -> cusparseStatus_t;
8677}
8678unsafe extern "C" {
8679    /// This function returns the `values` field of the dense vector descriptor `dnVecDescr`.
8680    ///
8681    /// # Parameters
8682    ///
8683    /// - `dnVecDescr`: Dense vector descriptor.
8684    /// - `values`: Values of the dense vector.
8685    pub fn cusparseDnVecGetValues(
8686        dnVecDescr: cusparseDnVecDescr_t,
8687        values: *mut *mut ::core::ffi::c_void,
8688    ) -> cusparseStatus_t;
8689}
8690unsafe extern "C" {
8691    pub fn cusparseConstDnVecGetValues(
8692        dnVecDescr: cusparseConstDnVecDescr_t,
8693        values: *mut *const ::core::ffi::c_void,
8694    ) -> cusparseStatus_t;
8695}
8696unsafe extern "C" {
8697    /// This function set the `values` field of the dense vector descriptor `dnVecDescr`.
8698    ///
8699    /// [`cusparseDnVecSetValues`] has the following constraints:
8700    ///
8701    /// * `values` must be aligned to the size of the datatype specified in `dnVecDescr`. Refer to [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8702    ///
8703    /// # Parameters
8704    ///
8705    /// - `dnVecDescr`: Dense vector descriptor.
8706    /// - `values`: Values of the dense vector. Array with `size` elements.
8707    pub fn cusparseDnVecSetValues(
8708        dnVecDescr: cusparseDnVecDescr_t,
8709        values: *mut ::core::ffi::c_void,
8710    ) -> cusparseStatus_t;
8711}
8712unsafe extern "C" {
8713    /// This function releases the host memory allocated for the sparse matrix descriptor `spMatDescr`.
8714    ///
8715    /// # Parameters
8716    ///
8717    /// - `spMatDescr`: Sparse matrix descriptor.
8718    pub fn cusparseDestroySpMat(
8719        spMatDescr: cusparseConstSpMatDescr_t,
8720    ) -> cusparseStatus_t;
8721}
8722unsafe extern "C" {
8723    /// This function returns the `format` field of the sparse matrix descriptor `spMatDescr`.
8724    ///
8725    /// # Parameters
8726    ///
8727    /// - `spMatDescr`: Sparse matrix descriptor.
8728    /// - `format`: Storage format of the sparse matrix.
8729    pub fn cusparseSpMatGetFormat(
8730        spMatDescr: cusparseConstSpMatDescr_t,
8731        format: *mut cusparseFormat_t,
8732    ) -> cusparseStatus_t;
8733}
8734unsafe extern "C" {
8735    /// This function returns the `idxBase` field of the sparse matrix descriptor `spMatDescr`.
8736    ///
8737    /// # Parameters
8738    ///
8739    /// - `spMatDescr`: Sparse matrix descriptor.
8740    /// - `idxBase`: Index base of the sparse matrix.
8741    pub fn cusparseSpMatGetIndexBase(
8742        spMatDescr: cusparseConstSpMatDescr_t,
8743        idxBase: *mut cusparseIndexBase_t,
8744    ) -> cusparseStatus_t;
8745}
8746unsafe extern "C" {
8747    /// This function returns the `values` field of the sparse matrix descriptor `spMatDescr`.
8748    ///
8749    /// # Parameters
8750    ///
8751    /// - `spMatDescr`: Sparse matrix descriptor.
8752    /// - `values`: Values of the sparse matrix. Array with `nnz` elements.
8753    pub fn cusparseSpMatGetValues(
8754        spMatDescr: cusparseSpMatDescr_t,
8755        values: *mut *mut ::core::ffi::c_void,
8756    ) -> cusparseStatus_t;
8757}
8758unsafe extern "C" {
8759    pub fn cusparseConstSpMatGetValues(
8760        spMatDescr: cusparseConstSpMatDescr_t,
8761        values: *mut *const ::core::ffi::c_void,
8762    ) -> cusparseStatus_t;
8763}
8764unsafe extern "C" {
8765    /// This function sets the `values` field of the sparse matrix descriptor `spMatDescr`.
8766    ///
8767    /// [`cusparseSpMatSetValues`] has the following constraints:
8768    ///
8769    /// * `values` must be aligned to the size of its corresponding datatype specified in `spMatDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8770    ///
8771    /// # Parameters
8772    ///
8773    /// - `spMatDescr`: Sparse matrix descriptor.
8774    /// - `values`: Values of the sparse matrix. Array with `nnz` elements.
8775    pub fn cusparseSpMatSetValues(
8776        spMatDescr: cusparseSpMatDescr_t,
8777        values: *mut ::core::ffi::c_void,
8778    ) -> cusparseStatus_t;
8779}
8780unsafe extern "C" {
8781    /// This function returns the sizes of the sparse matrix `spMatDescr`.
8782    ///
8783    /// # Parameters
8784    ///
8785    /// - `spMatDescr`: Sparse matrix descriptor.
8786    /// - `rows`: Number of rows of the sparse matrix.
8787    /// - `cols`: Number of columns of the sparse matrix.
8788    /// - `nnz`: Number of non-zero entries of the sparse matrix.
8789    pub fn cusparseSpMatGetSize(
8790        spMatDescr: cusparseConstSpMatDescr_t,
8791        rows: *mut i64,
8792        cols: *mut i64,
8793        nnz: *mut i64,
8794    ) -> cusparseStatus_t;
8795}
8796unsafe extern "C" {
8797    /// This function returns the `batchCount` field of the sparse matrix descriptor `spMatDescr`.
8798    ///
8799    /// # Parameters
8800    ///
8801    /// - `spMatDescr`: Sparse matrix descriptor.
8802    /// - `batchCount`: Number of batches of the sparse matrix.
8803    pub fn cusparseSpMatGetStridedBatch(
8804        spMatDescr: cusparseConstSpMatDescr_t,
8805        batchCount: *mut ::core::ffi::c_int,
8806    ) -> cusparseStatus_t;
8807}
8808unsafe extern "C" {
8809    /// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
8810    ///
8811    /// # Parameters
8812    ///
8813    /// - `spMatDescr`: Sparse matrix descriptor.
8814    /// - `batchCount`: Number of batches of the sparse matrix.
8815    /// - `batchStride`: address offset between consecutive batches.
8816    pub fn cusparseCooSetStridedBatch(
8817        spMatDescr: cusparseSpMatDescr_t,
8818        batchCount: ::core::ffi::c_int,
8819        batchStride: i64,
8820    ) -> cusparseStatus_t;
8821}
8822unsafe extern "C" {
8823    /// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
8824    ///
8825    /// # Parameters
8826    ///
8827    /// - `spMatDescr`: Sparse matrix descriptor.
8828    /// - `batchCount`: Number of batches of the sparse matrix.
8829    /// - `offsetsBatchStride`: Address offset between consecutive batches for the row offset array.
8830    /// - `columnsValuesBatchStride`: Address offset between consecutive batches for the column and value arrays.
8831    pub fn cusparseCsrSetStridedBatch(
8832        spMatDescr: cusparseSpMatDescr_t,
8833        batchCount: ::core::ffi::c_int,
8834        offsetsBatchStride: i64,
8835        columnsValuesBatchStride: i64,
8836    ) -> cusparseStatus_t;
8837}
8838unsafe extern "C" {
8839    /// This function sets the `batchCount` and the `batchStride` fields of the sparse matrix descriptor `spMatDescr`.
8840    ///
8841    /// # Parameters
8842    ///
8843    /// - `spMatDescr`: Sparse matrix descriptor.
8844    /// - `batchCount`: Number of batches of the sparse matrix.
8845    /// - `offsetsBatchStride`: Address offset between consecutive batches for the row offset array.
8846    /// - `columnsBatchStride`: Address offset between consecutive batches for the column array.
8847    pub fn cusparseBsrSetStridedBatch(
8848        spMatDescr: cusparseSpMatDescr_t,
8849        batchCount: ::core::ffi::c_int,
8850        offsetsBatchStride: i64,
8851        columnsBatchStride: i64,
8852        ValuesBatchStride: i64,
8853    ) -> cusparseStatus_t;
8854}
8855#[repr(u32)]
8856#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
8857pub enum cusparseSpMatAttribute_t {
8858    CUSPARSE_SPMAT_FILL_MODE = 0,
8859    CUSPARSE_SPMAT_DIAG_TYPE = 1,
8860}
8861unsafe extern "C" {
8862    /// The function gets the attributes of the sparse matrix descriptor `spMatDescr`.
8863    ///
8864    /// | Attribute | Meaning | Possible Values |
8865    /// | --- | --- | --- |
8866    /// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_FILL_MODE`] | Indicates if the lower or upper part of a matrix is stored in sparse storage | [`cusparseFillMode_t::CUSPARSE_FILL_MODE_LOWER`] [`cusparseFillMode_t::CUSPARSE_FILL_MODE_UPPER`] |
8867    /// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_DIAG_TYPE`] | Indicates if the matrix diagonal entries are unity | [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_NON_UNIT`] [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] |
8868    ///
8869    /// # Parameters
8870    ///
8871    /// - `spMatDescr`: Sparse matrix descriptor.
8872    /// - `attribute`: Attribute enumerator.
8873    /// - `data`: Attribute value.
8874    /// - `dataSize`: Size of the attribute in bytes for safety.
8875    pub fn cusparseSpMatGetAttribute(
8876        spMatDescr: cusparseConstSpMatDescr_t,
8877        attribute: cusparseSpMatAttribute_t,
8878        data: *mut ::core::ffi::c_void,
8879        dataSize: size_t,
8880    ) -> cusparseStatus_t;
8881}
8882unsafe extern "C" {
8883    /// The function sets the attributes of the sparse matrix descriptor `spMatDescr`
8884    ///
8885    /// | Attribute | Meaning | Possible Values |
8886    /// | --- | --- | --- |
8887    /// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_FILL_MODE`] | Indicates if the lower or upper part of a matrix is stored in sparse storage | [`cusparseFillMode_t::CUSPARSE_FILL_MODE_LOWER`] [`cusparseFillMode_t::CUSPARSE_FILL_MODE_UPPER`] |
8888    /// | [`cusparseSpMatAttribute_t::CUSPARSE_SPMAT_DIAG_TYPE`] | Indicates if the matrix diagonal entries are unity | [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_NON_UNIT`] [`cusparseDiagType_t::CUSPARSE_DIAG_TYPE_UNIT`] |
8889    ///
8890    /// # Parameters
8891    ///
8892    /// - `spMatDescr`: Sparse matrix descriptor.
8893    /// - `attribute`: Attribute enumerator.
8894    /// - `data`: Attribute value.
8895    /// - `dataSize`: Size of the attribute in bytes for safety.
8896    pub fn cusparseSpMatSetAttribute(
8897        spMatDescr: cusparseSpMatDescr_t,
8898        attribute: cusparseSpMatAttribute_t,
8899        data: *mut ::core::ffi::c_void,
8900        dataSize: size_t,
8901    ) -> cusparseStatus_t;
8902}
8903unsafe extern "C" {
8904    /// This function initializes the sparse matrix descriptor `spMatDescr` in the CSR format.
8905    ///
8906    /// [`cusparseCreateCsr`] has the following constraints:
8907    ///
8908    /// * `csrRowOffsets`, `csrColInd`, and `csrValues` must be aligned to the size of the datatypes specified by `csrRowOffsetsType`, `csrColIndType`, and `valueType`, respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8909    ///
8910    /// # Parameters
8911    ///
8912    /// - `spMatDescr`: Sparse matrix descriptor.
8913    /// - `rows`: Number of rows of the sparse matrix.
8914    /// - `cols`: Number of columns of the sparse matrix.
8915    /// - `nnz`: Number of non-zero entries of the sparse matrix.
8916    /// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
8917    /// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
8918    /// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
8919    /// - `csrRowOffsetsType`: Data type of `csrRowOffsets`.
8920    /// - `csrColIndType`: Data type of `csrColInd`.
8921    /// - `idxBase`: Index base of `csrRowOffsets` and `csrColInd`.
8922    /// - `valueType`: Datatype of `csrValues`.
8923    pub fn cusparseCreateCsr(
8924        spMatDescr: *mut cusparseSpMatDescr_t,
8925        rows: i64,
8926        cols: i64,
8927        nnz: i64,
8928        csrRowOffsets: *mut ::core::ffi::c_void,
8929        csrColInd: *mut ::core::ffi::c_void,
8930        csrValues: *mut ::core::ffi::c_void,
8931        csrRowOffsetsType: cusparseIndexType_t,
8932        csrColIndType: cusparseIndexType_t,
8933        idxBase: cusparseIndexBase_t,
8934        valueType: cudaDataType,
8935    ) -> cusparseStatus_t;
8936}
8937unsafe extern "C" {
8938    pub fn cusparseCreateConstCsr(
8939        spMatDescr: *mut cusparseConstSpMatDescr_t,
8940        rows: i64,
8941        cols: i64,
8942        nnz: i64,
8943        csrRowOffsets: *const ::core::ffi::c_void,
8944        csrColInd: *const ::core::ffi::c_void,
8945        csrValues: *const ::core::ffi::c_void,
8946        csrRowOffsetsType: cusparseIndexType_t,
8947        csrColIndType: cusparseIndexType_t,
8948        idxBase: cusparseIndexBase_t,
8949        valueType: cudaDataType,
8950    ) -> cusparseStatus_t;
8951}
8952unsafe extern "C" {
8953    /// This function initializes the sparse matrix descriptor `spMatDescr` in the CSC format.
8954    ///
8955    /// [`cusparseCreateCsc`] has the following constraints:
8956    ///
8957    /// * `cscColOffsets`, `cscRowInd`, and `cscValues` must be aligned to the size of the datatypes specified by `cscColOffsetsType`, `cscRowIndType`, and `valueType`, respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
8958    ///
8959    /// # Parameters
8960    ///
8961    /// - `spMatDescr`: Sparse matrix descriptor.
8962    /// - `rows`: Number of rows of the sparse matrix.
8963    /// - `cols`: Number of columns of the sparse matrix.
8964    /// - `nnz`: Number of non-zero entries of the sparse matrix.
8965    /// - `cscColOffsets`: Column offsets of the sparse matrix. Array with `cols + 1` elements.
8966    /// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
8967    /// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
8968    /// - `cscColOffsetsType`: Data type of `cscColOffsets`.
8969    /// - `cscRowIndType`: Data type of `cscRowInd`.
8970    /// - `idxBase`: Index base of `cscColOffsets` and `cscRowInd`.
8971    /// - `valueType`: Datatype of `cscValues`.
8972    pub fn cusparseCreateCsc(
8973        spMatDescr: *mut cusparseSpMatDescr_t,
8974        rows: i64,
8975        cols: i64,
8976        nnz: i64,
8977        cscColOffsets: *mut ::core::ffi::c_void,
8978        cscRowInd: *mut ::core::ffi::c_void,
8979        cscValues: *mut ::core::ffi::c_void,
8980        cscColOffsetsType: cusparseIndexType_t,
8981        cscRowIndType: cusparseIndexType_t,
8982        idxBase: cusparseIndexBase_t,
8983        valueType: cudaDataType,
8984    ) -> cusparseStatus_t;
8985}
8986unsafe extern "C" {
8987    pub fn cusparseCreateConstCsc(
8988        spMatDescr: *mut cusparseConstSpMatDescr_t,
8989        rows: i64,
8990        cols: i64,
8991        nnz: i64,
8992        cscColOffsets: *const ::core::ffi::c_void,
8993        cscRowInd: *const ::core::ffi::c_void,
8994        cscValues: *const ::core::ffi::c_void,
8995        cscColOffsetsType: cusparseIndexType_t,
8996        cscRowIndType: cusparseIndexType_t,
8997        idxBase: cusparseIndexBase_t,
8998        valueType: cudaDataType,
8999    ) -> cusparseStatus_t;
9000}
9001unsafe extern "C" {
9002    /// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in CSR format.
9003    ///
9004    /// # Parameters
9005    ///
9006    /// - `spMatDescr`: Sparse matrix descriptor.
9007    /// - `rows`: Number of rows of the sparse matrix.
9008    /// - `cols`: Number of columns of the sparse matrix.
9009    /// - `nnz`: Number of non-zero entries of the sparse matrix.
9010    /// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
9011    /// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
9012    /// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
9013    /// - `csrRowOffsetsType`: Data type of `csrRowOffsets`.
9014    /// - `csrColIndType`: Data type of `csrColInd`.
9015    /// - `idxBase`: Index base of `csrRowOffsets` and `csrColInd`.
9016    /// - `valueType`: Datatype of `csrValues`.
9017    pub fn cusparseCsrGet(
9018        spMatDescr: cusparseSpMatDescr_t,
9019        rows: *mut i64,
9020        cols: *mut i64,
9021        nnz: *mut i64,
9022        csrRowOffsets: *mut *mut ::core::ffi::c_void,
9023        csrColInd: *mut *mut ::core::ffi::c_void,
9024        csrValues: *mut *mut ::core::ffi::c_void,
9025        csrRowOffsetsType: *mut cusparseIndexType_t,
9026        csrColIndType: *mut cusparseIndexType_t,
9027        idxBase: *mut cusparseIndexBase_t,
9028        valueType: *mut cudaDataType,
9029    ) -> cusparseStatus_t;
9030}
9031unsafe extern "C" {
9032    pub fn cusparseConstCsrGet(
9033        spMatDescr: cusparseConstSpMatDescr_t,
9034        rows: *mut i64,
9035        cols: *mut i64,
9036        nnz: *mut i64,
9037        csrRowOffsets: *mut *const ::core::ffi::c_void,
9038        csrColInd: *mut *const ::core::ffi::c_void,
9039        csrValues: *mut *const ::core::ffi::c_void,
9040        csrRowOffsetsType: *mut cusparseIndexType_t,
9041        csrColIndType: *mut cusparseIndexType_t,
9042        idxBase: *mut cusparseIndexBase_t,
9043        valueType: *mut cudaDataType,
9044    ) -> cusparseStatus_t;
9045}
9046unsafe extern "C" {
9047    /// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in CSC format.
9048    ///
9049    /// # Parameters
9050    ///
9051    /// - `spMatDescr`: Sparse matrix descriptor.
9052    /// - `rows`: Number of rows of the sparse matrix.
9053    /// - `cols`: Number of columns of the sparse matrix.
9054    /// - `nnz`: Number of non-zero entries of the sparse matrix.
9055    /// - `cscColOffsets`: Col offsets of the sparse matrix. Array with `cols + 1` elements.
9056    /// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
9057    /// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
9058    /// - `cscColOffsetsType`: Data type of `cscColOffsets`.
9059    /// - `cscRowIndType`: Data type of `cscRowInd`.
9060    /// - `idxBase`: Index base of `cscColOffsets` and `cscRowInd`.
9061    /// - `valueType`: Datatype of `cscValues`.
9062    pub fn cusparseCscGet(
9063        spMatDescr: cusparseSpMatDescr_t,
9064        rows: *mut i64,
9065        cols: *mut i64,
9066        nnz: *mut i64,
9067        cscColOffsets: *mut *mut ::core::ffi::c_void,
9068        cscRowInd: *mut *mut ::core::ffi::c_void,
9069        cscValues: *mut *mut ::core::ffi::c_void,
9070        cscColOffsetsType: *mut cusparseIndexType_t,
9071        cscRowIndType: *mut cusparseIndexType_t,
9072        idxBase: *mut cusparseIndexBase_t,
9073        valueType: *mut cudaDataType,
9074    ) -> cusparseStatus_t;
9075}
9076unsafe extern "C" {
9077    pub fn cusparseConstCscGet(
9078        spMatDescr: cusparseConstSpMatDescr_t,
9079        rows: *mut i64,
9080        cols: *mut i64,
9081        nnz: *mut i64,
9082        cscColOffsets: *mut *const ::core::ffi::c_void,
9083        cscRowInd: *mut *const ::core::ffi::c_void,
9084        cscValues: *mut *const ::core::ffi::c_void,
9085        cscColOffsetsType: *mut cusparseIndexType_t,
9086        cscRowIndType: *mut cusparseIndexType_t,
9087        idxBase: *mut cusparseIndexBase_t,
9088        valueType: *mut cudaDataType,
9089    ) -> cusparseStatus_t;
9090}
9091unsafe extern "C" {
9092    /// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
9093    ///
9094    /// [`cusparseCsrSetPointers`] has the following constraints:
9095    ///
9096    /// * `csrRowOffsets`, `csrColInd`, and `csrValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9097    ///
9098    /// # Parameters
9099    ///
9100    /// - `spMatDescr`: Sparse matrix descriptor.
9101    /// - `csrRowOffsets`: Row offsets of the sparse matrix. Array with `rows + 1` elements.
9102    /// - `csrColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
9103    /// - `csrValues`: Values of the sparse matrix. Array with `nnz` elements.
9104    pub fn cusparseCsrSetPointers(
9105        spMatDescr: cusparseSpMatDescr_t,
9106        csrRowOffsets: *mut ::core::ffi::c_void,
9107        csrColInd: *mut ::core::ffi::c_void,
9108        csrValues: *mut ::core::ffi::c_void,
9109    ) -> cusparseStatus_t;
9110}
9111unsafe extern "C" {
9112    /// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
9113    ///
9114    /// [`cusparseCscSetPointers`] has the following constraints:
9115    ///
9116    /// * `cscColOffsets`, `cscRowInd`, and `cscValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9117    ///
9118    /// # Parameters
9119    ///
9120    /// - `spMatDescr`: Sparse matrix descriptor.
9121    /// - `cscColOffsets`: Col offsets of the sparse matrix. Array with `cols + 1` elements.
9122    /// - `cscRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
9123    /// - `cscValues`: Values of the sparse matrix. Array with `nnz` elements.
9124    pub fn cusparseCscSetPointers(
9125        spMatDescr: cusparseSpMatDescr_t,
9126        cscColOffsets: *mut ::core::ffi::c_void,
9127        cscRowInd: *mut ::core::ffi::c_void,
9128        cscValues: *mut ::core::ffi::c_void,
9129    ) -> cusparseStatus_t;
9130}
9131unsafe extern "C" {
9132    /// This function initializes the sparse matrix descriptor `spMatDescr` for the Block Compressed Row (BSR) format.
9133    ///
9134    /// [`cusparseCreateBsr`] has the following constraints:
9135    ///
9136    /// * `bsrRowOffsets`, `bsrColInd`, and `bsrValues` must be aligned to the size of the datatypes specified by `bsrRowOffsetsType`, `bsrColIndType`, and `valueType`, respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9137    ///
9138    /// # Parameters
9139    ///
9140    /// - `spMatDescr`: Sparse matrix descriptor.
9141    /// - `brows`: Number of block rows of the sparse matrix.
9142    /// - `bcols`: Number of block columns of the sparse matrix.
9143    /// - `bnnz`: Number of blocks of the sparse matrix.
9144    /// - `rowBlockSize`: Number of rows of each block.
9145    /// - `colBlockSize`: Number of columns of each block.
9146    /// - `bsrRowOffsets`: Block row offsets of the sparse matrix. Array of size `brows + 1`.
9147    /// - `bsrColInd`: Block column indices of the sparse matrix. Array of size `bnnz`.
9148    /// - `bsrValues`: Values of the sparse matrix. Array of size `bnnz * rowBlockSize * colBlockSize`.
9149    /// - `bsrRowOffsetsType`: Data type of `bsrRowOffsets`.
9150    /// - `bsrColIndType`: Data type of `bsrColInd`.
9151    /// - `idxBase`: Base index of `bsrRowOffsets` and `bsrColInd`.
9152    /// - `valueType`: Datatype of `bsrValues`.
9153    /// - `order`: Enumerator specifying the memory layout of values in each block.
9154    pub fn cusparseCreateBsr(
9155        spMatDescr: *mut cusparseSpMatDescr_t,
9156        brows: i64,
9157        bcols: i64,
9158        bnnz: i64,
9159        rowBlockSize: i64,
9160        colBlockSize: i64,
9161        bsrRowOffsets: *mut ::core::ffi::c_void,
9162        bsrColInd: *mut ::core::ffi::c_void,
9163        bsrValues: *mut ::core::ffi::c_void,
9164        bsrRowOffsetsType: cusparseIndexType_t,
9165        bsrColIndType: cusparseIndexType_t,
9166        idxBase: cusparseIndexBase_t,
9167        valueType: cudaDataType,
9168        order: cusparseOrder_t,
9169    ) -> cusparseStatus_t;
9170}
9171unsafe extern "C" {
9172    pub fn cusparseCreateConstBsr(
9173        spMatDescr: *mut cusparseConstSpMatDescr_t,
9174        brows: i64,
9175        bcols: i64,
9176        bnnz: i64,
9177        rowBlockDim: i64,
9178        colBlockDim: i64,
9179        bsrRowOffsets: *const ::core::ffi::c_void,
9180        bsrColInd: *const ::core::ffi::c_void,
9181        bsrValues: *const ::core::ffi::c_void,
9182        bsrRowOffsetsType: cusparseIndexType_t,
9183        bsrColIndType: cusparseIndexType_t,
9184        idxBase: cusparseIndexBase_t,
9185        valueType: cudaDataType,
9186        order: cusparseOrder_t,
9187    ) -> cusparseStatus_t;
9188}
9189unsafe extern "C" {
9190    /// This function initializes the sparse matrix descriptor `spMatDescr` in the COO format (Structure of Arrays layout).
9191    ///
9192    /// [`cusparseCreateCoo`] has the following constraints:
9193    ///
9194    /// * `cooRowInd`, `cooColInd`, and `cooValues` must be aligned to the size of the datatypes specified by `cooIdxType`, `cooIdxType`, and `valueType`. respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9195    ///
9196    /// # Parameters
9197    ///
9198    /// - `spMatDescr`: Sparse matrix descriptor.
9199    /// - `rows`: Number of rows of the sparse matrix.
9200    /// - `cols`: Number of columns of the sparse matrix.
9201    /// - `nnz`: Number of non-zero entries of the sparse matrix.
9202    /// - `cooRowInd`: Row indices of the sparse matrix. Array with `nnz` elements.
9203    /// - `cooColInd`: Column indices of the sparse matrix. Array with `nnz` elements.
9204    /// - `cooValues`: Values of the sparse matrix. Array with `nnz` elements.
9205    /// - `cooIdxType`: Data type of `cooRowInd` and `cooColInd`.
9206    /// - `idxBase`: Index base of `cooRowInd` and `cooColInd`.
9207    /// - `valueType`: Datatype of `cooValues`.
9208    pub fn cusparseCreateCoo(
9209        spMatDescr: *mut cusparseSpMatDescr_t,
9210        rows: i64,
9211        cols: i64,
9212        nnz: i64,
9213        cooRowInd: *mut ::core::ffi::c_void,
9214        cooColInd: *mut ::core::ffi::c_void,
9215        cooValues: *mut ::core::ffi::c_void,
9216        cooIdxType: cusparseIndexType_t,
9217        idxBase: cusparseIndexBase_t,
9218        valueType: cudaDataType,
9219    ) -> cusparseStatus_t;
9220}
9221unsafe extern "C" {
9222    pub fn cusparseCreateConstCoo(
9223        spMatDescr: *mut cusparseConstSpMatDescr_t,
9224        rows: i64,
9225        cols: i64,
9226        nnz: i64,
9227        cooRowInd: *const ::core::ffi::c_void,
9228        cooColInd: *const ::core::ffi::c_void,
9229        cooValues: *const ::core::ffi::c_void,
9230        cooIdxType: cusparseIndexType_t,
9231        idxBase: cusparseIndexBase_t,
9232        valueType: cudaDataType,
9233    ) -> cusparseStatus_t;
9234}
9235unsafe extern "C" {
9236    /// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in COO format (Array of Structures layout).
9237    ///
9238    /// # Parameters
9239    ///
9240    /// - `spMatDescr`: Sparse matrix descriptor.
9241    /// - `rows`: Number of rows of the sparse matrix.
9242    /// - `cols`: Number of columns of the sparse matrix.
9243    /// - `nnz`: Number of non-zero entries of the sparse matrix.
9244    /// - `cooRowInd`: Row indices of the sparse matrix. Array `nnz` elements.
9245    /// - `cooColInd`: Column indices of the sparse matrix. Array `nnz` elements.
9246    /// - `cooValues`: Values of the sparse matrix. Array `nnz` elements.
9247    /// - `idxBase`: Index base of `cooRowInd` and `cooColInd`.
9248    /// - `valueType`: Datatype of `cooValues`.
9249    pub fn cusparseCooGet(
9250        spMatDescr: cusparseSpMatDescr_t,
9251        rows: *mut i64,
9252        cols: *mut i64,
9253        nnz: *mut i64,
9254        cooRowInd: *mut *mut ::core::ffi::c_void,
9255        cooColInd: *mut *mut ::core::ffi::c_void,
9256        cooValues: *mut *mut ::core::ffi::c_void,
9257        idxType: *mut cusparseIndexType_t,
9258        idxBase: *mut cusparseIndexBase_t,
9259        valueType: *mut cudaDataType,
9260    ) -> cusparseStatus_t;
9261}
9262unsafe extern "C" {
9263    pub fn cusparseConstCooGet(
9264        spMatDescr: cusparseConstSpMatDescr_t,
9265        rows: *mut i64,
9266        cols: *mut i64,
9267        nnz: *mut i64,
9268        cooRowInd: *mut *const ::core::ffi::c_void,
9269        cooColInd: *mut *const ::core::ffi::c_void,
9270        cooValues: *mut *const ::core::ffi::c_void,
9271        idxType: *mut cusparseIndexType_t,
9272        idxBase: *mut cusparseIndexBase_t,
9273        valueType: *mut cudaDataType,
9274    ) -> cusparseStatus_t;
9275}
9276unsafe extern "C" {
9277    /// This function sets the pointers of the sparse matrix descriptor `spMatDescr`.
9278    ///
9279    /// [`cusparseCooSetPointers`] has the following constraints:
9280    ///
9281    /// * `cooRows`, `cooColumns`, and `cooValues` must be aligned to the size of their corresponding datatypes specified in `spMatDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9282    ///
9283    /// # Parameters
9284    ///
9285    /// - `spMatDescr`: Sparse matrix descriptor.
9286    /// - `cooRows`: Row indices of the sparse matrix. Array with `nnz` elements.
9287    /// - `cooColumns`: Column indices of the sparse matrix. Array with `nnz` elements.
9288    /// - `cooValues`: Values of the sparse matrix. Array with `nnz` elements.
9289    pub fn cusparseCooSetPointers(
9290        spMatDescr: cusparseSpMatDescr_t,
9291        cooRows: *mut ::core::ffi::c_void,
9292        cooColumns: *mut ::core::ffi::c_void,
9293        cooValues: *mut ::core::ffi::c_void,
9294    ) -> cusparseStatus_t;
9295}
9296unsafe extern "C" {
9297    /// This function initializes the sparse matrix descriptor `spMatDescr` for the Blocked-Ellpack (ELL) format.
9298    ///
9299    /// Blocked-ELL Column indices (`ellColInd`) are in the range `[0, cols / ellBlockSize -1]`. The array can contain `-1` values for indicating empty blocks.
9300    ///
9301    /// # Parameters
9302    ///
9303    /// - `spMatDescr`: Sparse matrix descriptor.
9304    /// - `rows`: Number of rows of the sparse matrix.
9305    /// - `cols`: Number of columns of the sparse matrix.
9306    /// - `ellBlockSize`: Size of the ELL-Block.
9307    /// - `ellCols`: Actual number of columns of the Blocked-Ellpack format (`ellValue` columns).
9308    /// - `ellColInd`: Blocked-ELL Column indices. Array with `[ellCols / ellBlockSize][rows / ellBlockSize]` elements.
9309    /// - `ellValue`: Values of the sparse matrix. Array with `rows * ellCols` elements.
9310    /// - `ellIdxType`: Data type of `ellColInd`.
9311    /// - `idxBase`: Index base of `ellColInd`.
9312    /// - `valueType`: Data type of `ellValue`.
9313    pub fn cusparseCreateBlockedEll(
9314        spMatDescr: *mut cusparseSpMatDescr_t,
9315        rows: i64,
9316        cols: i64,
9317        ellBlockSize: i64,
9318        ellCols: i64,
9319        ellColInd: *mut ::core::ffi::c_void,
9320        ellValue: *mut ::core::ffi::c_void,
9321        ellIdxType: cusparseIndexType_t,
9322        idxBase: cusparseIndexBase_t,
9323        valueType: cudaDataType,
9324    ) -> cusparseStatus_t;
9325}
9326unsafe extern "C" {
9327    pub fn cusparseCreateConstBlockedEll(
9328        spMatDescr: *mut cusparseConstSpMatDescr_t,
9329        rows: i64,
9330        cols: i64,
9331        ellBlockSize: i64,
9332        ellCols: i64,
9333        ellColInd: *const ::core::ffi::c_void,
9334        ellValue: *const ::core::ffi::c_void,
9335        ellIdxType: cusparseIndexType_t,
9336        idxBase: cusparseIndexBase_t,
9337        valueType: cudaDataType,
9338    ) -> cusparseStatus_t;
9339}
9340unsafe extern "C" {
9341    /// This function returns the fields of the sparse matrix descriptor `spMatDescr` stored in Blocked-Ellpack (ELL) format.
9342    ///
9343    /// # Parameters
9344    ///
9345    /// - `spMatDescr`: Sparse matrix descriptor.
9346    /// - `rows`: Number of rows of the sparse matrix.
9347    /// - `cols`: Number of columns of the sparse matrix.
9348    /// - `ellBlockSize`: Size of the ELL-Block.
9349    /// - `ellCols`: Actual number of columns of the Blocked-Ellpack format.
9350    /// - `ellColInd`: Column indices for the ELL-Block. Array with `[cols / ellBlockSize][rows / ellBlockSize]` elements.
9351    /// - `ellValue`: Values of the sparse matrix. Array with `rows * ellCols` elements.
9352    /// - `ellIdxType`: Data type of `ellColInd`.
9353    /// - `idxBase`: Index base of `ellColInd`.
9354    /// - `valueType`: Datatype of `ellValue`.
9355    pub fn cusparseBlockedEllGet(
9356        spMatDescr: cusparseSpMatDescr_t,
9357        rows: *mut i64,
9358        cols: *mut i64,
9359        ellBlockSize: *mut i64,
9360        ellCols: *mut i64,
9361        ellColInd: *mut *mut ::core::ffi::c_void,
9362        ellValue: *mut *mut ::core::ffi::c_void,
9363        ellIdxType: *mut cusparseIndexType_t,
9364        idxBase: *mut cusparseIndexBase_t,
9365        valueType: *mut cudaDataType,
9366    ) -> cusparseStatus_t;
9367}
9368unsafe extern "C" {
9369    pub fn cusparseConstBlockedEllGet(
9370        spMatDescr: cusparseConstSpMatDescr_t,
9371        rows: *mut i64,
9372        cols: *mut i64,
9373        ellBlockSize: *mut i64,
9374        ellCols: *mut i64,
9375        ellColInd: *mut *const ::core::ffi::c_void,
9376        ellValue: *mut *const ::core::ffi::c_void,
9377        ellIdxType: *mut cusparseIndexType_t,
9378        idxBase: *mut cusparseIndexBase_t,
9379        valueType: *mut cudaDataType,
9380    ) -> cusparseStatus_t;
9381}
9382unsafe extern "C" {
9383    /// This function initializes the sparse matrix descriptor `spMatDescr` for the Sliced Ellpack (SELL) format.
9384    ///
9385    /// [`cusparseCreateSlicedEll`] has the following constraints:
9386    ///
9387    /// * `sellSliceOffsets`, `sellColInd`, and `sellValues` must be aligned to the size of the datatypes specified by `sellSliceOffsetsType`, `sellColIndType`, and `valueType`, respectively. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9388    ///
9389    /// # Parameters
9390    ///
9391    /// - `spMatDescr`: Sparse matrix descriptor.
9392    /// - `rows`: Number of rows of the sparse matrix.
9393    /// - `cols`: Number of columns of the sparse matrix.
9394    /// - `nnz`: Number of nonzero elements in the sparse matrix.
9395    /// - `sellValuesSize`: Total number of elements in `sellValues` array (nonzero and padding).
9396    /// - `sliceSize`: The number of rows per slice.
9397    /// - `sellSliceOffsets`: Slice offsets of the sparse matrix. Array of size $\left \lceil{\frac{rows}{sliceSize}}\right \rceil + 1$.
9398    /// - `sellColInd`: Column indexes of the sparse matrix. Array of size `sellValuesSize`.
9399    /// - `sellValues`: Values of the sparse matrix. Array of size `sellValuesSize` elements.
9400    /// - `sellSliceOffsetsType`: Data type of `sellSliceOffsets`.
9401    /// - `sellColIndType`: Data type of `sellColInd`.
9402    /// - `idxBase`: Index base of `sellColInd`.
9403    /// - `valueType`: Data type of `sellValues`.
9404    pub fn cusparseCreateSlicedEll(
9405        spMatDescr: *mut cusparseSpMatDescr_t,
9406        rows: i64,
9407        cols: i64,
9408        nnz: i64,
9409        sellValuesSize: i64,
9410        sliceSize: i64,
9411        sellSliceOffsets: *mut ::core::ffi::c_void,
9412        sellColInd: *mut ::core::ffi::c_void,
9413        sellValues: *mut ::core::ffi::c_void,
9414        sellSliceOffsetsType: cusparseIndexType_t,
9415        sellColIndType: cusparseIndexType_t,
9416        idxBase: cusparseIndexBase_t,
9417        valueType: cudaDataType,
9418    ) -> cusparseStatus_t;
9419}
9420unsafe extern "C" {
9421    pub fn cusparseCreateConstSlicedEll(
9422        spMatDescr: *mut cusparseConstSpMatDescr_t,
9423        rows: i64,
9424        cols: i64,
9425        nnz: i64,
9426        sellValuesSize: i64,
9427        sliceSize: i64,
9428        sellSliceOffsets: *const ::core::ffi::c_void,
9429        sellColInd: *const ::core::ffi::c_void,
9430        sellValues: *const ::core::ffi::c_void,
9431        sellSliceOffsetsType: cusparseIndexType_t,
9432        sellColIndType: cusparseIndexType_t,
9433        idxBase: cusparseIndexBase_t,
9434        valueType: cudaDataType,
9435    ) -> cusparseStatus_t;
9436}
9437unsafe extern "C" {
9438    /// The function initializes the dense matrix descriptor `dnMatDescr`.
9439    ///
9440    /// [`cusparseCreateDnMat`] has the following constraints:
9441    ///
9442    /// * `values` must be aligned to the size of the datatype specified by `valueType`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9443    ///
9444    /// # Parameters
9445    ///
9446    /// - `dnMatDescr`: Dense matrix descriptor.
9447    /// - `rows`: Number of rows of the dense matrix.
9448    /// - `cols`: Number of columns of the dense matrix.
9449    /// - `ld`: Leading dimension of the dense matrix.
9450    /// - `values`: Values of the dense matrix. Array with `size` elements.
9451    /// - `valueType`: Enumerator specifying the datatype of `values`.
9452    /// - `order`: Enumerator specifying the memory layout of the dense matrix.
9453    pub fn cusparseCreateDnMat(
9454        dnMatDescr: *mut cusparseDnMatDescr_t,
9455        rows: i64,
9456        cols: i64,
9457        ld: i64,
9458        values: *mut ::core::ffi::c_void,
9459        valueType: cudaDataType,
9460        order: cusparseOrder_t,
9461    ) -> cusparseStatus_t;
9462}
9463unsafe extern "C" {
9464    pub fn cusparseCreateConstDnMat(
9465        dnMatDescr: *mut cusparseConstDnMatDescr_t,
9466        rows: i64,
9467        cols: i64,
9468        ld: i64,
9469        values: *const ::core::ffi::c_void,
9470        valueType: cudaDataType,
9471        order: cusparseOrder_t,
9472    ) -> cusparseStatus_t;
9473}
9474unsafe extern "C" {
9475    /// This function releases the host memory allocated for the dense matrix descriptor `dnMatDescr`.
9476    ///
9477    /// # Parameters
9478    ///
9479    /// - `dnMatDescr`: Dense matrix descriptor.
9480    pub fn cusparseDestroyDnMat(
9481        dnMatDescr: cusparseConstDnMatDescr_t,
9482    ) -> cusparseStatus_t;
9483}
9484unsafe extern "C" {
9485    /// This function returns the fields of the dense matrix descriptor `dnMatDescr`.
9486    ///
9487    /// # Parameters
9488    ///
9489    /// - `dnMatDescr`: Dense matrix descriptor.
9490    /// - `rows`: Number of rows of the dense matrix.
9491    /// - `cols`: Number of columns of the dense matrix.
9492    /// - `ld`: Leading dimension of the dense matrix.
9493    /// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
9494    /// - `order`: Enumerator specifying the memory layout of the dense matrix.
9495    pub fn cusparseDnMatGet(
9496        dnMatDescr: cusparseDnMatDescr_t,
9497        rows: *mut i64,
9498        cols: *mut i64,
9499        ld: *mut i64,
9500        values: *mut *mut ::core::ffi::c_void,
9501        type_: *mut cudaDataType,
9502        order: *mut cusparseOrder_t,
9503    ) -> cusparseStatus_t;
9504}
9505unsafe extern "C" {
9506    pub fn cusparseConstDnMatGet(
9507        dnMatDescr: cusparseConstDnMatDescr_t,
9508        rows: *mut i64,
9509        cols: *mut i64,
9510        ld: *mut i64,
9511        values: *mut *const ::core::ffi::c_void,
9512        type_: *mut cudaDataType,
9513        order: *mut cusparseOrder_t,
9514    ) -> cusparseStatus_t;
9515}
9516unsafe extern "C" {
9517    /// This function returns the `values` field of the dense matrix descriptor `dnMatDescr`.
9518    ///
9519    /// # Parameters
9520    ///
9521    /// - `dnMatDescr`: Dense matrix descriptor.
9522    /// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
9523    pub fn cusparseDnMatGetValues(
9524        dnMatDescr: cusparseDnMatDescr_t,
9525        values: *mut *mut ::core::ffi::c_void,
9526    ) -> cusparseStatus_t;
9527}
9528unsafe extern "C" {
9529    pub fn cusparseConstDnMatGetValues(
9530        dnMatDescr: cusparseConstDnMatDescr_t,
9531        values: *mut *const ::core::ffi::c_void,
9532    ) -> cusparseStatus_t;
9533}
9534unsafe extern "C" {
9535    /// This function sets the `values` field of the dense matrix descriptor `dnMatDescr`.
9536    ///
9537    /// [`cusparseDnMatSetValues`] has the following constraints:
9538    ///
9539    /// * `values` must be aligned to the size of the datatype specified in `dnMatDescr`. See [cudaDataType_t](#cudaDataType_t) for the description of the datatypes.
9540    ///
9541    /// # Parameters
9542    ///
9543    /// - `dnMatDescr`: Dense matrix descriptor.
9544    /// - `values`: Values of the dense matrix. Array with `ld * cols` elements.
9545    pub fn cusparseDnMatSetValues(
9546        dnMatDescr: cusparseDnMatDescr_t,
9547        values: *mut ::core::ffi::c_void,
9548    ) -> cusparseStatus_t;
9549}
9550unsafe extern "C" {
9551    /// The function sets the number of batches and the batch stride of the dense matrix descriptor `dnMatDescr`.
9552    ///
9553    /// # Parameters
9554    ///
9555    /// - `dnMatDescr`: Dense matrix descriptor.
9556    /// - `batchCount`: Number of batches of the dense matrix.
9557    /// - `batchStride`: Address offset between a matrix and the next one in the batch. `batchStride ≥ ld * cols` if the matrix uses column-major layout, `batchStride ≥ ld * rows` otherwise.
9558    pub fn cusparseDnMatSetStridedBatch(
9559        dnMatDescr: cusparseDnMatDescr_t,
9560        batchCount: ::core::ffi::c_int,
9561        batchStride: i64,
9562    ) -> cusparseStatus_t;
9563}
9564unsafe extern "C" {
9565    /// The function returns the number of batches and the batch stride of the dense matrix descriptor `dnMatDescr`.
9566    ///
9567    /// # Parameters
9568    ///
9569    /// - `dnMatDescr`: Dense matrix descriptor.
9570    /// - `batchCount`: Number of batches of the dense matrix.
9571    /// - `batchStride`: Address offset between a matrix and the next one in the batch.
9572    pub fn cusparseDnMatGetStridedBatch(
9573        dnMatDescr: cusparseConstDnMatDescr_t,
9574        batchCount: *mut ::core::ffi::c_int,
9575        batchStride: *mut i64,
9576    ) -> cusparseStatus_t;
9577}
9578unsafe extern "C" {
9579    /// The function computes the sum of a sparse vector `vecX` and a dense vector `vecY`.
9580    ///
9581    /// In other words,
9582    ///
9583    /// ```text
9584    /// for i=0 to n-1
9585    /// Y[i] = beta * Y[i]
9586    /// for i=0 to nnz-1
9587    /// Y[X_indices[i]] += alpha * X_values[i]
9588    /// ```
9589    ///
9590    /// [`cusparseAxpby`] supports the following index type for representing the sparse vector `vecX`:
9591    ///
9592    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9593    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9594    ///
9595    /// [`cusparseAxpby`] supports the following data types:
9596    ///
9597    /// Uniform-precision computation:
9598    ///
9599    /// | `X`/`Y`/`compute` |
9600    /// | --- |
9601    /// | [`cudaDataType_t::CUDA_R_32F`] |
9602    /// | [`cudaDataType_t::CUDA_R_64F`] |
9603    /// | [`cudaDataType_t::CUDA_C_32F`] |
9604    /// | [`cudaDataType_t::CUDA_C_64F`] |
9605    ///
9606    /// Mixed-precision computation:
9607    ///
9608    /// | `X`/`Y` | `compute` |  |
9609    /// | --- | --- | --- |
9610    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] |  |
9611    /// | [`cudaDataType_t::CUDA_R_16BF`] |  |  |
9612    /// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_32F`] | [DEPRECATED] |
9613    /// | [`cudaDataType_t::CUDA_C_16BF`] | [DEPRECATED] |  |
9614    ///
9615    /// [`cusparseAxpby`] has the following constraints:
9616    ///
9617    /// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
9618    ///
9619    /// [`cusparseAxpby`] has the following properties:
9620    ///
9621    /// * The routine requires no extra storage
9622    /// * The routine supports asynchronous execution
9623    /// * Provides deterministic (bit-wise) results for each run if the the sparse vector `vecX` indices are distinct
9624    /// * The routine allows `indices` of `vecX` to be unsorted
9625    ///
9626    /// [`cusparseAxpby`] supports the following optimizations:
9627    ///
9628    /// * CUDA graph capture
9629    /// * Hardware Memory Compression
9630    ///
9631    /// Please visit [cuSPARSE Library Samples - cusparseAxpby](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/axpby) for a code example.
9632    ///
9633    /// # Parameters
9634    ///
9635    /// - `handle`: Handle to the cuSPARSE library context.
9636    /// - `alpha`: $\alpha$ scalar used for multiplication of compute type.
9637    /// - `vecX`: Sparse vector `X`.
9638    /// - `beta`: $\beta$ scalar used for multiplication of compute type.
9639    /// - `vecY`: Dense vector `Y`.
9640    #[deprecated]
9641    pub fn cusparseAxpby(
9642        handle: cusparseHandle_t,
9643        alpha: *const ::core::ffi::c_void,
9644        vecX: cusparseConstSpVecDescr_t,
9645        beta: *const ::core::ffi::c_void,
9646        vecY: cusparseDnVecDescr_t,
9647    ) -> cusparseStatus_t;
9648}
9649unsafe extern "C" {
9650    /// The function gathers the elements of the dense vector `vecY` into the sparse vector `vecX`
9651    ///
9652    /// In other words,
9653    ///
9654    /// ```text
9655    /// for i=0 to nnz-1
9656    /// X_values[i] = Y[X_indices[i]]
9657    /// ```
9658    ///
9659    /// [`cusparseGather`] supports the following index type for representing the sparse vector `vecX`:
9660    ///
9661    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9662    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9663    ///
9664    /// [`cusparseGather`] supports the following data types:
9665    ///
9666    /// | `X`/`Y` |
9667    /// | --- |
9668    /// | [`cudaDataType_t::CUDA_R_16F`] |
9669    /// | [`cudaDataType_t::CUDA_R_16BF`] |
9670    /// | [`cudaDataType_t::CUDA_R_32F`] |
9671    /// | [`cudaDataType_t::CUDA_R_64F`] |
9672    /// | [`cudaDataType_t::CUDA_C_16F`] [DEPRECATED] |
9673    /// | [`cudaDataType_t::CUDA_C_16BF`] [DEPRECATED] |
9674    /// | [`cudaDataType_t::CUDA_C_32F`] |
9675    /// | [`cudaDataType_t::CUDA_C_64F`] |
9676    ///
9677    /// [`cusparseGather`] has the following constraints:
9678    ///
9679    /// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
9680    ///
9681    /// [`cusparseGather`] has the following properties:
9682    ///
9683    /// * The routine requires no extra storage
9684    /// * The routine supports asynchronous execution
9685    /// * Provides deterministic (bit-wise) results for each run if the the sparse vector `vecX` indices are distinct
9686    /// * The routine allows `indices` of `vecX` to be unsorted
9687    ///
9688    /// [`cusparseGather`] supports the following optimizations:
9689    ///
9690    /// * CUDA graph capture
9691    /// * Hardware Memory Compression
9692    ///
9693    /// Please visit [cuSPARSE Library Samples - cusparseGather](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/gather) for a code example.
9694    ///
9695    /// # Parameters
9696    ///
9697    /// - `handle`: Handle to the cuSPARSE library context.
9698    /// - `vecY`: Dense vector `Y`.
9699    /// - `vecX`: Sparse vector `X`.
9700    pub fn cusparseGather(
9701        handle: cusparseHandle_t,
9702        vecY: cusparseConstDnVecDescr_t,
9703        vecX: cusparseSpVecDescr_t,
9704    ) -> cusparseStatus_t;
9705}
9706unsafe extern "C" {
9707    /// The function scatters the elements of the sparse vector `vecX` into the dense vector `vecY`
9708    ///
9709    /// In other words,
9710    ///
9711    /// ```text
9712    /// for i=0 to nnz-1
9713    /// Y[X_indices[i]] = X_values[i]
9714    /// ```
9715    ///
9716    /// [`cusparseScatter`] supports the following index type for representing the sparse vector `vecX`:
9717    ///
9718    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9719    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9720    ///
9721    /// [`cusparseScatter`] supports the following data types:
9722    ///
9723    /// | `X`/`Y` |
9724    /// | --- |
9725    /// | [`cudaDataType_t::CUDA_R_8I`] |
9726    /// | [`cudaDataType_t::CUDA_R_16F`] |
9727    /// | [`cudaDataType_t::CUDA_R_16BF`] |
9728    /// | [`cudaDataType_t::CUDA_R_32F`] |
9729    /// | [`cudaDataType_t::CUDA_R_64F`] |
9730    /// | [`cudaDataType_t::CUDA_C_16F`] [DEPRECATED] |
9731    /// | [`cudaDataType_t::CUDA_C_16BF`] [DEPRECATED] |
9732    /// | [`cudaDataType_t::CUDA_C_32F`] |
9733    /// | [`cudaDataType_t::CUDA_C_64F`] |
9734    ///
9735    /// [`cusparseScatter`] has the following constraints:
9736    ///
9737    /// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
9738    ///
9739    /// [`cusparseScatter`] has the following properties:
9740    ///
9741    /// * The routine requires no extra storage
9742    /// * The routine supports asynchronous execution
9743    /// * Provides deterministic (bit-wise) results for each run if the the sparse vector `vecX` indices are distinct
9744    /// * The routine allows `indices` of `vecX` to be unsorted
9745    ///
9746    /// [`cusparseScatter`] supports the following optimizations:
9747    ///
9748    /// * CUDA graph capture
9749    /// * Hardware Memory Compression
9750    ///
9751    /// Please visit [cuSPARSE Library Samples - cusparseScatter](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/scatter) for a code example.
9752    ///
9753    /// # Parameters
9754    ///
9755    /// - `handle`: Handle to the cuSPARSE library context.
9756    /// - `vecX`: Sparse vector `X`.
9757    /// - `vecY`: Dense vector `Y`.
9758    pub fn cusparseScatter(
9759        handle: cusparseHandle_t,
9760        vecX: cusparseConstSpVecDescr_t,
9761        vecY: cusparseDnVecDescr_t,
9762    ) -> cusparseStatus_t;
9763}
9764unsafe extern "C" {
9765    /// The function computes the Givens rotation matrix
9766    ///
9767    /// to a sparse `vecX` and a dense vector `vecY`
9768    ///
9769    /// In other words,
9770    ///
9771    /// ```text
9772    /// for i=0 to nnz-1
9773    /// Y[X_indices[i]] = c * Y[X_indices[i]] - s * X_values[i]
9774    /// X_values[i]     = c * X_values[i]     + s * Y[X_indices[i]]
9775    /// ```
9776    ///
9777    /// [`cusparseRot`] supports the following index type for representing the sparse vector `vecX`:
9778    ///
9779    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9780    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9781    ///
9782    /// [`cusparseRot`] supports the following data types:
9783    ///
9784    /// Uniform-precision computation:
9785    ///
9786    /// | `X`/`Y`/`compute` |
9787    /// | --- |
9788    /// | [`cudaDataType_t::CUDA_R_32F`] |
9789    /// | [`cudaDataType_t::CUDA_R_64F`] |
9790    /// | [`cudaDataType_t::CUDA_C_32F`] |
9791    /// | [`cudaDataType_t::CUDA_C_64F`] |
9792    ///
9793    /// Mixed-precision computation:
9794    ///
9795    /// | `X`/`Y` | `compute` |  |
9796    /// | --- | --- | --- |
9797    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] |  |
9798    /// | [`cudaDataType_t::CUDA_R_16BF`] |  |  |
9799    /// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_32F`] | [DEPRECATED] |
9800    /// | [`cudaDataType_t::CUDA_C_16BF`] | [DEPRECATED] |  |
9801    ///
9802    /// [`cusparseRot`] has the following constraints:
9803    ///
9804    /// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
9805    ///
9806    /// [`cusparseRot`] has the following properties:
9807    ///
9808    /// * The routine requires no extra storage
9809    /// * The routine supports asynchronous execution
9810    /// * Provides deterministic (bit-wise) results for each run if the the sparse vector `vecX` indices are distinct
9811    ///
9812    /// [`cusparseRot`] supports the following optimizations:
9813    ///
9814    /// * CUDA graph capture
9815    /// * Hardware Memory Compression
9816    ///
9817    /// Please visit [cuSPARSE Library Samples - cusparseRot](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/rot) for a code example.
9818    ///
9819    /// # Parameters
9820    ///
9821    /// - `handle`: Handle to the cuSPARSE library context.
9822    /// - `c_coeff`: cosine element of the rotation matrix.
9823    /// - `s_coeff`: sine element of the rotation matrix.
9824    /// - `vecX`: Sparse vector `X`.
9825    /// - `vecY`: Dense vector `Y`.
9826    #[deprecated]
9827    pub fn cusparseRot(
9828        handle: cusparseHandle_t,
9829        c_coeff: *const ::core::ffi::c_void,
9830        s_coeff: *const ::core::ffi::c_void,
9831        vecX: cusparseSpVecDescr_t,
9832        vecY: cusparseDnVecDescr_t,
9833    ) -> cusparseStatus_t;
9834}
9835unsafe extern "C" {
9836    pub fn cusparseSpVV_bufferSize(
9837        handle: cusparseHandle_t,
9838        opX: cusparseOperation_t,
9839        vecX: cusparseConstSpVecDescr_t,
9840        vecY: cusparseConstDnVecDescr_t,
9841        result: *const ::core::ffi::c_void,
9842        computeType: cudaDataType,
9843        bufferSize: *mut size_t,
9844    ) -> cusparseStatus_t;
9845}
9846unsafe extern "C" {
9847    /// The function computes the inner dot product of a sparse vector `vecX` and a dense vector `vecY`
9848    ///
9849    /// In other words,
9850    ///
9851    /// ![image10](_images/op-x-non-transpose-2.png)
9852    ///
9853    /// The function [`cusparseSpVV_bufferSize`] returns the size of the workspace needed by [`cusparseSpVV`]
9854    ///
9855    /// [`cusparseSpVV`] supports the following index type for representing the sparse vector `vecX`:
9856    ///
9857    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9858    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9859    ///
9860    /// The data types combinations currently supported for [`cusparseSpVV`] are listed below:
9861    ///
9862    /// Uniform-precision computation:
9863    ///
9864    /// | `X`/`Y`/`computeType` |
9865    /// | --- |
9866    /// | [`cudaDataType_t::CUDA_R_32F`] |
9867    /// | [`cudaDataType_t::CUDA_R_64F`] |
9868    /// | [`cudaDataType_t::CUDA_C_32F`] |
9869    /// | [`cudaDataType_t::CUDA_C_64F`] |
9870    ///
9871    /// Mixed-precision computation:
9872    ///
9873    /// | `X`/`Y` | `computeType`/`result` | Notes |
9874    /// | --- | --- | --- |
9875    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] |  |
9876    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] |  |
9877    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] |  |
9878    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] |  |
9879    /// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_32F`] | [DEPRECATED] |
9880    /// | [`cudaDataType_t::CUDA_C_16BF`] | [`cudaDataType_t::CUDA_C_32F`] | [DEPRECATED] |
9881    ///
9882    /// [`cusparseSpVV`] has the following constraints:
9883    ///
9884    /// * The arrays representing the sparse vector `vecX` must be aligned to 16 bytes
9885    ///
9886    /// [`cusparseSpVV`] has the following properties:
9887    ///
9888    /// * The routine requires no extra storage
9889    /// * The routine supports asynchronous execution
9890    /// * Provides deterministic (bit-wise) results for each run if the the sparse vector `vecX` indices are distinct
9891    /// * The routine allows `indices` of `vecX` to be unsorted
9892    ///
9893    /// [`cusparseSpVV`] supports the following optimizations:
9894    ///
9895    /// * CUDA graph capture
9896    /// * Hardware Memory Compression
9897    ///
9898    /// Please visit [cuSPARSE Library Samples - cusparseSpVV](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spvv) for a code example.
9899    ///
9900    /// # Parameters
9901    ///
9902    /// - `handle`: Handle to the cuSPARSE library context.
9903    /// - `opX`: Operation `op(X)` that is non-transpose or conjugate transpose.
9904    /// - `vecX`: Sparse vector `X`.
9905    /// - `vecY`: Dense vector `Y`.
9906    /// - `result`: The resulting dot product.
9907    /// - `computeType`: Datatype in which the computation is executed.
9908    /// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
9909    #[deprecated]
9910    pub fn cusparseSpVV(
9911        handle: cusparseHandle_t,
9912        opX: cusparseOperation_t,
9913        vecX: cusparseConstSpVecDescr_t,
9914        vecY: cusparseConstDnVecDescr_t,
9915        result: *mut ::core::ffi::c_void,
9916        computeType: cudaDataType,
9917        externalBuffer: *mut ::core::ffi::c_void,
9918    ) -> cusparseStatus_t;
9919}
9920#[repr(u32)]
9921#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
9922pub enum cusparseSparseToDenseAlg_t {
9923    CUSPARSE_SPARSETODENSE_ALG_DEFAULT = 0,
9924}
9925unsafe extern "C" {
9926    pub fn cusparseSparseToDense_bufferSize(
9927        handle: cusparseHandle_t,
9928        matA: cusparseConstSpMatDescr_t,
9929        matB: cusparseDnMatDescr_t,
9930        alg: cusparseSparseToDenseAlg_t,
9931        bufferSize: *mut size_t,
9932    ) -> cusparseStatus_t;
9933}
9934unsafe extern "C" {
9935    /// The function converts the sparse matrix `matA` in CSR, CSC, or COO format into its dense representation `matB`. Blocked-ELL is not currently supported.
9936    ///
9937    /// The function [`cusparseSparseToDense_bufferSize`] returns the size of the workspace needed by [`cusparseSparseToDense`].
9938    ///
9939    /// [`cusparseSparseToDense`] supports the following index type for representing the sparse matrix `matA`:
9940    ///
9941    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
9942    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
9943    ///
9944    /// [`cusparseSparseToDense`] supports the following data types:
9945    ///
9946    /// | `A`/`B` |
9947    /// | --- |
9948    /// | [`cudaDataType_t::CUDA_R_8I`] |
9949    /// | [`cudaDataType_t::CUDA_R_16F`] |
9950    /// | [`cudaDataType_t::CUDA_R_16BF`] |
9951    /// | [`cudaDataType_t::CUDA_R_32F`] |
9952    /// | [`cudaDataType_t::CUDA_R_64F`] |
9953    /// | [`cudaDataType_t::CUDA_C_16F`] [DEPRECATED] |
9954    /// | [`cudaDataType_t::CUDA_C_16BF`] [DEPRECATED] |
9955    /// | [`cudaDataType_t::CUDA_C_32F`] |
9956    /// | [`cudaDataType_t::CUDA_C_64F`] |
9957    ///
9958    /// `cusparseSparse2Dense()` supports the following algorithm:
9959    ///
9960    /// | Algorithm | Notes |
9961    /// | --- | --- |
9962    /// | [`cusparseSparseToDenseAlg_t::CUSPARSE_SPARSETODENSE_ALG_DEFAULT`] | Default algorithm |
9963    ///
9964    /// [`cusparseSparseToDense`] has the following properties:
9965    ///
9966    /// * The routine requires no extra storage
9967    /// * The routine supports asynchronous execution
9968    /// * Provides deterministic (bit-wise) results for each run
9969    /// * The routine allows the indices of `matA` to be unsorted
9970    ///
9971    /// [`cusparseSparseToDense`] supports the following optimizations:
9972    ///
9973    /// * CUDA graph capture
9974    /// * Hardware Memory Compression
9975    ///
9976    /// Please visit [cuSPARSE Library Samples - cusparseSparseToDense](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/sparse2dense_csr) for a code example.
9977    ///
9978    /// # Parameters
9979    ///
9980    /// - `handle`: Handle to the cuSPARSE library context.
9981    /// - `matA`: Sparse matrix `A`.
9982    /// - `matB`: Dense matrix `B`.
9983    /// - `alg`: Algorithm for the computation.
9984    pub fn cusparseSparseToDense(
9985        handle: cusparseHandle_t,
9986        matA: cusparseConstSpMatDescr_t,
9987        matB: cusparseDnMatDescr_t,
9988        alg: cusparseSparseToDenseAlg_t,
9989        externalBuffer: *mut ::core::ffi::c_void,
9990    ) -> cusparseStatus_t;
9991}
9992#[repr(u32)]
9993#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
9994pub enum cusparseDenseToSparseAlg_t {
9995    CUSPARSE_DENSETOSPARSE_ALG_DEFAULT = 0,
9996}
9997unsafe extern "C" {
9998    pub fn cusparseDenseToSparse_bufferSize(
9999        handle: cusparseHandle_t,
10000        matA: cusparseConstDnMatDescr_t,
10001        matB: cusparseSpMatDescr_t,
10002        alg: cusparseDenseToSparseAlg_t,
10003        bufferSize: *mut size_t,
10004    ) -> cusparseStatus_t;
10005}
10006unsafe extern "C" {
10007    pub fn cusparseDenseToSparse_analysis(
10008        handle: cusparseHandle_t,
10009        matA: cusparseConstDnMatDescr_t,
10010        matB: cusparseSpMatDescr_t,
10011        alg: cusparseDenseToSparseAlg_t,
10012        externalBuffer: *mut ::core::ffi::c_void,
10013    ) -> cusparseStatus_t;
10014}
10015unsafe extern "C" {
10016    pub fn cusparseDenseToSparse_convert(
10017        handle: cusparseHandle_t,
10018        matA: cusparseConstDnMatDescr_t,
10019        matB: cusparseSpMatDescr_t,
10020        alg: cusparseDenseToSparseAlg_t,
10021        externalBuffer: *mut ::core::ffi::c_void,
10022    ) -> cusparseStatus_t;
10023}
10024#[repr(u32)]
10025#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10026pub enum cusparseSpMVAlg_t {
10027    CUSPARSE_SPMV_ALG_DEFAULT = 0,
10028    CUSPARSE_SPMV_CSR_ALG1 = 2,
10029    CUSPARSE_SPMV_CSR_ALG2 = 3,
10030    CUSPARSE_SPMV_COO_ALG1 = 1,
10031    CUSPARSE_SPMV_COO_ALG2 = 4,
10032    CUSPARSE_SPMV_SELL_ALG1 = 5,
10033    CUSPARSE_SPMV_BSR_ALG1 = 6,
10034}
10035unsafe extern "C" {
10036    /// This function performs the multiplication of a sparse matrix `matA` and a dense vector `vecX`
10037    ///
10038    /// where
10039    ///
10040    /// * `op(A)` is a sparse matrix of size $m \times k$
10041    /// * `X` is a dense vector of size $k$
10042    /// * `Y` is a dense vector of size $m$
10043    /// * $\alpha$ and $\beta$ are scalars
10044    ///
10045    /// Also, for matrix `A`
10046    ///
10047    /// ![image11](_images/op-a-opa-non-transpose.png)
10048    ///
10049    /// The function [`cusparseSpMV_bufferSize`] returns the size of the workspace needed by [`cusparseSpMV_preprocess`] and [`cusparseSpMV`]
10050    ///
10051    /// The sparse matrix formats currently supported are listed below:
10052    ///
10053    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_COO`]
10054    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
10055    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSC`]
10056    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
10057    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_SLICED_ELL`]
10058    ///
10059    /// [`cusparseSpMV`] supports the following index type for representing the sparse matrix `matA`:
10060    ///
10061    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
10062    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
10063    ///
10064    /// [`cusparseSpMV`] supports the following data types:
10065    ///
10066    /// Uniform-precision computation:
10067    ///
10068    /// | `A`/`X`/ `Y`/`computeType` |
10069    /// | --- |
10070    /// | [`cudaDataType_t::CUDA_R_32F`] |
10071    /// | [`cudaDataType_t::CUDA_R_64F`] |
10072    /// | [`cudaDataType_t::CUDA_C_32F`] |
10073    /// | [`cudaDataType_t::CUDA_C_64F`] |
10074    ///
10075    /// Mixed-precision computation:
10076    ///
10077    /// | `A`/`X` | `Y` | `computeType` | Notes |
10078    /// | --- | --- | --- | --- |
10079    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] |  |
10080    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |  |
10081    /// | [`cudaDataType_t::CUDA_R_16F`] |  |  |  |
10082    /// | [`cudaDataType_t::CUDA_R_16BF`] |  |  |  |
10083    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] |  |  |
10084    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] |  |  |
10085    /// | [`cudaDataType_t::CUDA_C_32F`] | [`cudaDataType_t::CUDA_C_32F`] | [`cudaDataType_t::CUDA_C_32F`] |  |
10086    /// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_16F`] | [DEPRECATED] |  |
10087    /// | [`cudaDataType_t::CUDA_C_16BF`] | [`cudaDataType_t::CUDA_C_16BF`] | [DEPRECATED] |  |
10088    ///
10089    /// | `A` | `X`/`Y`/`computeType` |
10090    /// | --- | --- |
10091    /// | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_64F`] |
10092    ///
10093    /// Mixed Regular/Complex computation:
10094    ///
10095    /// | `A` | `X`/`Y`/`computeType` |
10096    /// | --- | --- |
10097    /// | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_C_32F`] |
10098    /// | [`cudaDataType_t::CUDA_R_64F`] | [`cudaDataType_t::CUDA_C_64F`] |
10099    ///
10100    /// NOTE: [`cudaDataType_t::CUDA_R_16F`], [`cudaDataType_t::CUDA_R_16BF`], [`cudaDataType_t::CUDA_C_16F`], and [`cudaDataType_t::CUDA_C_16BF`] data types always imply mixed-precision computation.
10101    ///
10102    /// [`cusparseSpMV`] supports the following algorithms:
10103    ///
10104    /// | Algorithm | Notes |
10105    /// | --- | --- |
10106    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_ALG_DEFAULT`] | Default algorithm for any sparse matrix format. |
10107    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`] | Default algorithm for COO sparse matrix format. May produce slightly different results during different runs with the same input parameters. |
10108    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] | Provides deterministic (bit-wise) results for each run. If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`]. |
10109    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`] | Default algorithm for CSR/CSC sparse matrix format. May produce slightly different results during different runs with the same input parameters. |
10110    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`] | Provides deterministic (bit-wise) results for each run. If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`]. |
10111    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_SELL_ALG1`] | Default algorithm for Sliced Ellpack sparse matrix format. Provides deterministic (bit-wise) results for each run. |
10112    /// | [`cusparseSpMVAlg_t::CUSPARSE_SPMV_BSR_ALG1`] | Default algorithm for BSR sparse matrix format. Provides deterministic (bit-wise) results for each run. Supports only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`. Supports both row-major and column-major block layouts in `A`. |
10113    ///
10114    /// Calling [`cusparseSpMV_preprocess`] is optional.
10115    /// It may accelerate subsequent calls to [`cusparseSpMV`].
10116    /// It is useful when [`cusparseSpMV`] is called multiple times with the same sparsity pattern (`matA`).
10117    ///
10118    /// Calling [`cusparseSpMV_preprocess`] with `buffer` makes that buffer “active” for `matA` SpMV calls.
10119    /// Subsequent calls to [`cusparseSpMV`] with `matA` and the active buffer
10120    /// must use the same values for all parameters as the call to [`cusparseSpMV_preprocess`].
10121    /// The exceptions are: `alpha`, `beta`, `vecX`, `vecY`, and the values (but not indices) of `matA` may be different.
10122    /// Importantly, the buffer contents must be unmodified since the call to [`cusparseSpMV_preprocess`].
10123    /// When [`cusparseSpMV`] is called with `matA` and its active buffer, it may read acceleration data from the buffer.
10124    ///
10125    /// Calling [`cusparseSpMV_preprocess`] again with `matA` and a new buffer will make the new buffer active,
10126    /// forgetting about the previously-active buffer and making it inactive.
10127    /// For [`cusparseSpMV`], there can only be one active buffer per sparse matrix at a time.
10128    /// To get the effect of multiple active buffers for a single sparse matrix,
10129    /// create multiple matrix handles that all point to the same index and value buffers,
10130    /// and call [`cusparseSpMV_preprocess`] once per handle with different workspace buffers.
10131    ///
10132    /// Calling [`cusparseSpMV`] with an inactive buffer is always permitted.
10133    /// However, there may be no acceleration from the preprocessing in that case.
10134    ///
10135    /// For the purposes of thread safety,
10136    /// [`cusparseSpMV_preprocess`] is writing to `matA` internal state.
10137    ///
10138    /// **Performance notes:**
10139    ///
10140    /// * [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG1`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG1`] provide higher performance than [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`].
10141    /// * In general, `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is 3x faster than `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`.
10142    /// * Using [`cusparseSpMV_preprocess`] helps improve performance of [`cusparseSpMV`] in CSR. It is beneficial when we need to run [`cusparseSpMV`] multiple times with a same matrix ([`cusparseSpMV_preprocess`] is executed only once).
10143    ///
10144    /// [`cusparseSpMV`] has the following properties:
10145    ///
10146    /// * The routine requires extra storage for CSR/CSC format (all algorithms) and for COO format with [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`] algorithm.
10147    /// * Provides deterministic (bit-wise) results for each run only for [`cusparseSpMVAlg_t::CUSPARSE_SPMV_COO_ALG2`], [`cusparseSpMVAlg_t::CUSPARSE_SPMV_CSR_ALG2`] and [`cusparseSpMVAlg_t::CUSPARSE_SPMV_BSR_ALG1`] algorithms, and `opA == CUSPARSE_OPERATION_NON_TRANSPOSE`.
10148    /// * The routine supports asynchronous execution.
10149    /// * compute-sanitizer could report false race conditions for this routine when `beta == 0`. This is for optimization purposes and does not affect the correctness of the computation.
10150    /// * The routine allows the indices of `matA` to be unsorted.
10151    ///
10152    /// [`cusparseSpMV`] supports the following optimizations:
10153    ///
10154    /// * CUDA graph capture
10155    /// * Hardware Memory Compression
10156    ///
10157    /// Please visit [cuSPARSE Library Samples - cusparseSpMV CSR](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmv_csr) and [cusparseSpMV COO](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmv_coo) for a code example.
10158    ///
10159    /// # Parameters
10160    ///
10161    /// - `handle`: Handle to the cuSPARSE library context.
10162    /// - `opA`: Operation `op(A)`.
10163    /// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
10164    /// - `matA`: Sparse matrix `A`.
10165    /// - `vecX`: Dense vector `X`.
10166    /// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
10167    /// - `vecY`: Dense vector `Y`.
10168    /// - `computeType`: Datatype in which the computation is executed.
10169    /// - `alg`: Algorithm for the computation.
10170    /// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
10171    pub fn cusparseSpMV(
10172        handle: cusparseHandle_t,
10173        opA: cusparseOperation_t,
10174        alpha: *const ::core::ffi::c_void,
10175        matA: cusparseConstSpMatDescr_t,
10176        vecX: cusparseConstDnVecDescr_t,
10177        beta: *const ::core::ffi::c_void,
10178        vecY: cusparseDnVecDescr_t,
10179        computeType: cudaDataType,
10180        alg: cusparseSpMVAlg_t,
10181        externalBuffer: *mut ::core::ffi::c_void,
10182    ) -> cusparseStatus_t;
10183}
10184unsafe extern "C" {
10185    pub fn cusparseSpMV_bufferSize(
10186        handle: cusparseHandle_t,
10187        opA: cusparseOperation_t,
10188        alpha: *const ::core::ffi::c_void,
10189        matA: cusparseConstSpMatDescr_t,
10190        vecX: cusparseConstDnVecDescr_t,
10191        beta: *const ::core::ffi::c_void,
10192        vecY: cusparseDnVecDescr_t,
10193        computeType: cudaDataType,
10194        alg: cusparseSpMVAlg_t,
10195        bufferSize: *mut size_t,
10196    ) -> cusparseStatus_t;
10197}
10198unsafe extern "C" {
10199    pub fn cusparseSpMV_preprocess(
10200        handle: cusparseHandle_t,
10201        opA: cusparseOperation_t,
10202        alpha: *const ::core::ffi::c_void,
10203        matA: cusparseConstSpMatDescr_t,
10204        vecX: cusparseConstDnVecDescr_t,
10205        beta: *const ::core::ffi::c_void,
10206        vecY: cusparseDnVecDescr_t,
10207        computeType: cudaDataType,
10208        alg: cusparseSpMVAlg_t,
10209        externalBuffer: *mut ::core::ffi::c_void,
10210    ) -> cusparseStatus_t;
10211}
10212#[repr(u32)]
10213#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10214pub enum cusparseSpSVAlg_t {
10215    CUSPARSE_SPSV_ALG_DEFAULT = 0,
10216}
10217#[repr(u32)]
10218#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10219pub enum cusparseSpSVUpdate_t {
10220    CUSPARSE_SPSV_UPDATE_GENERAL = 0,
10221    CUSPARSE_SPSV_UPDATE_DIAGONAL = 1,
10222}
10223#[repr(C)]
10224#[derive(Debug, Copy, Clone)]
10225pub struct cusparseSpSVDescr {
10226    _unused: [u8; 0],
10227}
10228pub type cusparseSpSVDescr_t = *mut cusparseSpSVDescr;
10229unsafe extern "C" {
10230    pub fn cusparseSpSV_createDescr(descr: *mut cusparseSpSVDescr_t) -> cusparseStatus_t;
10231}
10232unsafe extern "C" {
10233    pub fn cusparseSpSV_destroyDescr(descr: cusparseSpSVDescr_t) -> cusparseStatus_t;
10234}
10235unsafe extern "C" {
10236    pub fn cusparseSpSV_bufferSize(
10237        handle: cusparseHandle_t,
10238        opA: cusparseOperation_t,
10239        alpha: *const ::core::ffi::c_void,
10240        matA: cusparseConstSpMatDescr_t,
10241        vecX: cusparseConstDnVecDescr_t,
10242        vecY: cusparseDnVecDescr_t,
10243        computeType: cudaDataType,
10244        alg: cusparseSpSVAlg_t,
10245        spsvDescr: cusparseSpSVDescr_t,
10246        bufferSize: *mut size_t,
10247    ) -> cusparseStatus_t;
10248}
10249unsafe extern "C" {
10250    pub fn cusparseSpSV_analysis(
10251        handle: cusparseHandle_t,
10252        opA: cusparseOperation_t,
10253        alpha: *const ::core::ffi::c_void,
10254        matA: cusparseConstSpMatDescr_t,
10255        vecX: cusparseConstDnVecDescr_t,
10256        vecY: cusparseDnVecDescr_t,
10257        computeType: cudaDataType,
10258        alg: cusparseSpSVAlg_t,
10259        spsvDescr: cusparseSpSVDescr_t,
10260        externalBuffer: *mut ::core::ffi::c_void,
10261    ) -> cusparseStatus_t;
10262}
10263unsafe extern "C" {
10264    pub fn cusparseSpSV_solve(
10265        handle: cusparseHandle_t,
10266        opA: cusparseOperation_t,
10267        alpha: *const ::core::ffi::c_void,
10268        matA: cusparseConstSpMatDescr_t,
10269        vecX: cusparseConstDnVecDescr_t,
10270        vecY: cusparseDnVecDescr_t,
10271        computeType: cudaDataType,
10272        alg: cusparseSpSVAlg_t,
10273        spsvDescr: cusparseSpSVDescr_t,
10274    ) -> cusparseStatus_t;
10275}
10276unsafe extern "C" {
10277    pub fn cusparseSpSV_updateMatrix(
10278        handle: cusparseHandle_t,
10279        spsvDescr: cusparseSpSVDescr_t,
10280        newValues: *mut ::core::ffi::c_void,
10281        updatePart: cusparseSpSVUpdate_t,
10282    ) -> cusparseStatus_t;
10283}
10284#[repr(u32)]
10285#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10286pub enum cusparseSpSMAlg_t {
10287    CUSPARSE_SPSM_ALG_DEFAULT = 0,
10288}
10289#[repr(u32)]
10290#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10291pub enum cusparseSpSMUpdate_t {
10292    CUSPARSE_SPSM_UPDATE_GENERAL = 0,
10293    CUSPARSE_SPSM_UPDATE_DIAGONAL = 1,
10294}
10295#[repr(C)]
10296#[derive(Debug, Copy, Clone)]
10297pub struct cusparseSpSMDescr {
10298    _unused: [u8; 0],
10299}
10300pub type cusparseSpSMDescr_t = *mut cusparseSpSMDescr;
10301unsafe extern "C" {
10302    pub fn cusparseSpSM_createDescr(descr: *mut cusparseSpSMDescr_t) -> cusparseStatus_t;
10303}
10304unsafe extern "C" {
10305    pub fn cusparseSpSM_destroyDescr(descr: cusparseSpSMDescr_t) -> cusparseStatus_t;
10306}
10307unsafe extern "C" {
10308    pub fn cusparseSpSM_bufferSize(
10309        handle: cusparseHandle_t,
10310        opA: cusparseOperation_t,
10311        opB: cusparseOperation_t,
10312        alpha: *const ::core::ffi::c_void,
10313        matA: cusparseConstSpMatDescr_t,
10314        matB: cusparseConstDnMatDescr_t,
10315        matC: cusparseDnMatDescr_t,
10316        computeType: cudaDataType,
10317        alg: cusparseSpSMAlg_t,
10318        spsmDescr: cusparseSpSMDescr_t,
10319        bufferSize: *mut size_t,
10320    ) -> cusparseStatus_t;
10321}
10322unsafe extern "C" {
10323    pub fn cusparseSpSM_analysis(
10324        handle: cusparseHandle_t,
10325        opA: cusparseOperation_t,
10326        opB: cusparseOperation_t,
10327        alpha: *const ::core::ffi::c_void,
10328        matA: cusparseConstSpMatDescr_t,
10329        matB: cusparseConstDnMatDescr_t,
10330        matC: cusparseDnMatDescr_t,
10331        computeType: cudaDataType,
10332        alg: cusparseSpSMAlg_t,
10333        spsmDescr: cusparseSpSMDescr_t,
10334        externalBuffer: *mut ::core::ffi::c_void,
10335    ) -> cusparseStatus_t;
10336}
10337unsafe extern "C" {
10338    pub fn cusparseSpSM_solve(
10339        handle: cusparseHandle_t,
10340        opA: cusparseOperation_t,
10341        opB: cusparseOperation_t,
10342        alpha: *const ::core::ffi::c_void,
10343        matA: cusparseConstSpMatDescr_t,
10344        matB: cusparseConstDnMatDescr_t,
10345        matC: cusparseDnMatDescr_t,
10346        computeType: cudaDataType,
10347        alg: cusparseSpSMAlg_t,
10348        spsmDescr: cusparseSpSMDescr_t,
10349    ) -> cusparseStatus_t;
10350}
10351unsafe extern "C" {
10352    pub fn cusparseSpSM_updateMatrix(
10353        handle: cusparseHandle_t,
10354        spsmDescr: cusparseSpSMDescr_t,
10355        newValues: *mut ::core::ffi::c_void,
10356        updatePart: cusparseSpSMUpdate_t,
10357    ) -> cusparseStatus_t;
10358}
10359#[repr(u32)]
10360#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10361pub enum cusparseSpMMAlg_t {
10362    CUSPARSE_SPMM_ALG_DEFAULT = 0,
10363    CUSPARSE_SPMM_COO_ALG1 = 1,
10364    CUSPARSE_SPMM_COO_ALG2 = 2,
10365    CUSPARSE_SPMM_COO_ALG3 = 3,
10366    CUSPARSE_SPMM_COO_ALG4 = 5,
10367    CUSPARSE_SPMM_CSR_ALG1 = 4,
10368    CUSPARSE_SPMM_CSR_ALG2 = 6,
10369    CUSPARSE_SPMM_CSR_ALG3 = 12,
10370    CUSPARSE_SPMM_BLOCKED_ELL_ALG1 = 13,
10371    CUSPARSE_SPMM_BSR_ALG1 = 14,
10372}
10373unsafe extern "C" {
10374    pub fn cusparseSpMM_bufferSize(
10375        handle: cusparseHandle_t,
10376        opA: cusparseOperation_t,
10377        opB: cusparseOperation_t,
10378        alpha: *const ::core::ffi::c_void,
10379        matA: cusparseConstSpMatDescr_t,
10380        matB: cusparseConstDnMatDescr_t,
10381        beta: *const ::core::ffi::c_void,
10382        matC: cusparseDnMatDescr_t,
10383        computeType: cudaDataType,
10384        alg: cusparseSpMMAlg_t,
10385        bufferSize: *mut size_t,
10386    ) -> cusparseStatus_t;
10387}
10388unsafe extern "C" {
10389    pub fn cusparseSpMM_preprocess(
10390        handle: cusparseHandle_t,
10391        opA: cusparseOperation_t,
10392        opB: cusparseOperation_t,
10393        alpha: *const ::core::ffi::c_void,
10394        matA: cusparseConstSpMatDescr_t,
10395        matB: cusparseConstDnMatDescr_t,
10396        beta: *const ::core::ffi::c_void,
10397        matC: cusparseDnMatDescr_t,
10398        computeType: cudaDataType,
10399        alg: cusparseSpMMAlg_t,
10400        externalBuffer: *mut ::core::ffi::c_void,
10401    ) -> cusparseStatus_t;
10402}
10403unsafe extern "C" {
10404    /// The function performs the multiplication of a sparse matrix `matA` and a dense matrix `matB`.
10405    ///
10406    /// where
10407    ///
10408    /// * `op(A)` is a sparse matrix of size $m \times k$
10409    /// * `op(B)` is a dense matrix of size $k \times n$
10410    /// * `C` is a dense matrix of size $m \times n$
10411    /// * $\alpha$ and $\beta$ are scalars
10412    ///
10413    /// The routine can be also used to perform the multiplication of a dense matrix and a sparse matrix by switching the dense matrices layout:
10414    ///
10415    /// where $\mathbf{B}_{C}$, $\mathbf{C}_{C}$ indicate column-major layout, while $\mathbf{B}_{R}$, $\mathbf{C}_{R}$ refer to row-major layout
10416    ///
10417    /// Also, for matrix `A` and `B`
10418    ///
10419    /// ![image11](_images/op-a-opa-non-transpose.png)
10420    ///
10421    /// ![image12](_images/op-b-opb-non-transpose.png)
10422    ///
10423    /// When using the (conjugate) transpose of the sparse matrix `A`, this routine may produce slightly different results during different runs with the same input parameters.
10424    ///
10425    /// The function [`cusparseSpMM_bufferSize`] returns the size of the workspace needed by [`cusparseSpMM`]
10426    ///
10427    /// Calling [`cusparseSpMM_preprocess`] is optional.
10428    /// It may accelerate subsequent calls to [`cusparseSpMM`].
10429    /// It is useful when [`cusparseSpMM`] is called multiple times with the same sparsity pattern (`matA`).
10430    /// It provides performance advantages with [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG1`] or [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG3`].
10431    /// For all other formats and algorithms have no effect.
10432    ///
10433    /// Calling [`cusparseSpMM_preprocess`] with `buffer` makes that buffer “active” for `matA` SpMM calls.
10434    /// Subsequent calls to [`cusparseSpMM`] with `matA` and the active buffer
10435    /// must use the same values for all parameters as the call to [`cusparseSpMM_preprocess`].
10436    /// The exceptions are: `alpha`, `beta`, `matX`, `matY`, and the values (but not indices) of `matA` may be different.
10437    /// Importantly, the buffer contents must be unmodified since the call to [`cusparseSpMM_preprocess`].
10438    /// When [`cusparseSpMM`] is called with `matA` and its active buffer, it may read acceleration data from the buffer.
10439    ///
10440    /// Calling [`cusparseSpMM_preprocess`] again with `matA` and a new buffer will make the new buffer active,
10441    /// forgetting about the previously-active buffer and making it inactive.
10442    /// For [`cusparseSpMM`], there can only be one active buffer per sparse matrix at a time.
10443    /// To get the effect of multiple active buffers for a single sparse matrix,
10444    /// create multiple matrix handles that all point to the same index and value buffers,
10445    /// and call [`cusparseSpMM_preprocess`] once per handle with different workspace buffers.
10446    ///
10447    /// Calling [`cusparseSpMM`] with an inactive buffer is always permitted.
10448    /// However, there may be no acceleration from the preprocessing in that case.
10449    ///
10450    /// For the purposes of thread safety,
10451    /// [`cusparseSpMM_preprocess`] is writing to `matA` internal state.
10452    ///
10453    /// [`cusparseSpMM`] supports the following sparse matrix formats:
10454    ///
10455    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_COO`]
10456    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
10457    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSC`]
10458    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
10459    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`]
10460    ///
10461    /// |  |  |
10462    /// | --- | --- |
10463    /// | **(1)** | **COO/CSR/CSC/BSR FORMATS** |
10464    ///
10465    /// [`cusparseSpMM`] supports the following index type for representing the sparse matrix `matA`:
10466    ///
10467    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
10468    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
10469    ///
10470    /// [`cusparseSpMM`] supports the following data types:
10471    ///
10472    /// Uniform-precision computation:
10473    ///
10474    /// | `A`/`B`/ `C`/`computeType` |
10475    /// | --- |
10476    /// | [`cudaDataType_t::CUDA_R_32F`] |
10477    /// | [`cudaDataType_t::CUDA_R_64F`] |
10478    /// | [`cudaDataType_t::CUDA_C_32F`] |
10479    /// | [`cudaDataType_t::CUDA_C_64F`] |
10480    ///
10481    /// Mixed-precision computation:
10482    ///
10483    /// | `A`/`B` | `C` | `computeType` |  |
10484    /// | --- | --- | --- | --- |
10485    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] |  |
10486    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |  |
10487    /// | [`cudaDataType_t::CUDA_R_16F`] |  |  |  |
10488    /// | [`cudaDataType_t::CUDA_R_16BF`] |  |  |  |
10489    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] |  |  |
10490    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] |  |  |
10491    /// | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_16F`] | [`cudaDataType_t::CUDA_C_32F`] | [DEPRECATED] |
10492    /// | [`cudaDataType_t::CUDA_C_16BF`] | [`cudaDataType_t::CUDA_C_16BF`] | [DEPRECATED] |  |
10493    ///
10494    /// NOTE: [`cudaDataType_t::CUDA_R_16F`], [`cudaDataType_t::CUDA_R_16BF`], [`cudaDataType_t::CUDA_C_16F`], and [`cudaDataType_t::CUDA_C_16BF`] data types always imply mixed-precision computation.
10495    ///
10496    /// [`cusparseSpMM`] supports the following algorithms:
10497    ///
10498    /// | Algorithm | Notes |
10499    /// | --- | --- |
10500    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT`] | Default algorithm for any sparse matrix format |
10501    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG1`] | Algorithm 1 for COO sparse matrix format   * May provide better performance for small number of nnz * Provides the best performance with column-major layout * It supports batched computation * May produce slightly different results during different runs with the same input parameters |
10502    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG2`] | Algorithm 2 for COO sparse matrix format   * It provides deterministic result * Provides the best performance with column-major layout * In general, slower than Algorithm 1 * It supports batched computation * It requires additional memory * If `opA != CUSPARSE_OPERATION_NON_TRANSPOSE`, it is identical to [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG1`] |
10503    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG3`] | Algorithm 3 for COO sparse matrix format   * May provide better performance for large number of nnz * May produce slightly different results during different runs with the same input parameters |
10504    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG4`] | Algorithm 4 for COO sparse matrix format   * Provides better performance with row-major layout * It supports batched computation * May produce slightly different results during different runs with the same input parameters |
10505    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG1`] | Algorithm 1 for CSR/CSC sparse matrix format   * Provides the best performance with column-major layout * It supports batched computation * It requires additional memory * May produce slightly different results during different runs with the same input parameters |
10506    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG2`] | Algorithm 2 for CSR/CSC sparse matrix format   * Provides the best performance with row-major layout * It supports batched computation * It requires additional memory * May produce slightly different results during different runs with the same input parameters |
10507    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG3`] | Algorithm 3 for CSR sparse matrix format   * It provides deterministic result * It requires additional memory * It supports only CSR matrix and `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` * It does not support `opB == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE` * It does not support `CUDA_C_16F and CUDA_C_16BF` data types |
10508    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_BSR_ALG1`] | Algorithm 1 for BSR sparse matrix format   * It provides deterministic result * It requires no additional memory * It supports only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` * It does not support [`cudaDataType_t::CUDA_C_16F`] and [`cudaDataType_t::CUDA_C_16BF`] data types * It does not support column-major blocks in `A` |
10509    ///
10510    /// NOTE: When using [`cusparseSpMM`] for mixed-precision computation on COO or CSR matrices, it defaults to algorithms [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG2`] and [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG3`], respectively. If the required computation isn’t supported by those algorithms, the mixed-precision operation will fail.
10511    ///
10512    /// **Performance notes:**
10513    ///
10514    /// * Row-major layout provides higher performance than column-major
10515    /// * [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG4`] and [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG2`] should be used with row-major layout, while [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG1`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG2`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG3`], and [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG1`] with column-major layout
10516    /// * For `beta != 1`, most algorithms scale the output matrix before the main computation
10517    /// * For `n == 1`, the routine may use [`cusparseSpMV`]
10518    ///
10519    /// [`cusparseSpMM`] with all algorithms support the following batch modes except for [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG3`]:
10520    ///
10521    /// * $C_{i} = A \cdot B_{i}$
10522    /// * $C_{i} = A_{i} \cdot B$
10523    /// * $C_{i} = A_{i} \cdot B_{i}$
10524    ///
10525    /// The number of batches and their strides can be set by using [`cusparseCooSetStridedBatch`], [`cusparseCsrSetStridedBatch`], and [`cusparseDnMatSetStridedBatch`]. The maximum number of batches for [`cusparseSpMM`] is 65,535.
10526    ///
10527    /// [`cusparseSpMM`] has the following properties:
10528    ///
10529    /// * The routine requires no extra storage for [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG1`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG3`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG4`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_BSR_ALG1`]
10530    /// * The routine supports asynchronous execution
10531    /// * Provides deterministic (bit-wise) results for each run only for [`cusparseSpMMAlg_t::CUSPARSE_SPMM_COO_ALG2`], [`cusparseSpMMAlg_t::CUSPARSE_SPMM_CSR_ALG3`], and [`cusparseSpMMAlg_t::CUSPARSE_SPMM_BSR_ALG1`] algorithms
10532    /// * `compute-sanitizer` could report false race conditions for this routine. This is for optimization purposes and does not affect the correctness of the computation
10533    /// * The routine allows the indices of `matA` to be unsorted
10534    ///
10535    /// [`cusparseSpMM`] supports the following optimizations:
10536    ///
10537    /// * CUDA graph capture
10538    /// * Hardware Memory Compression
10539    ///
10540    /// Please visit [cuSPARSE Library Samples - cusparseSpMM CSR](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr) and [cusparseSpMM COO](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_coo) for a code example. For batched computation please visit [cusparseSpMM CSR Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr_batched) and [cusparseSpMM COO Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_coo_batched).
10541    ///
10542    /// |  |  |
10543    /// | --- | --- |
10544    /// | **(2)** | **BLOCKED-ELLPACK FORMAT** |
10545    ///
10546    /// [`cusparseSpMM`] supports the following data types for [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`] format and the following GPU architectures for exploiting NVIDIA Tensor Cores:
10547    ///
10548    /// | `A`/`B` | `C` | `computeType` | `opB` | `Compute Capability` |
10549    /// | --- | --- | --- | --- | --- |
10550    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | `N`, `T` | `≥ 70` |
10551    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 70` |
10552    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 70` |
10553    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] | `N` column-major | `≥ 75` |
10554    /// | `T` row-major |  |  |  |  |
10555    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
10556    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
10557    /// | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] | `N`, `T` | `≥ 80` |
10558    /// | [`cudaDataType_t::CUDA_R_64F`] | [`cudaDataType_t::CUDA_R_64F`] | [`cudaDataType_t::CUDA_R_64F`] | `N`, `T` | `≥ 80` |
10559    ///
10560    /// [`cusparseSpMM`] supports the following algorithms with [`cusparseFormat_t::CUSPARSE_FORMAT_BLOCKED_ELL`] format:
10561    ///
10562    /// | Algorithm | Notes |
10563    /// | --- | --- |
10564    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_ALG_DEFAULT`] | Default algorithm for any sparse matrix format |
10565    /// | [`cusparseSpMMAlg_t::CUSPARSE_SPMM_BLOCKED_ELL_ALG1`] | Default algorithm for Blocked-ELL format |
10566    ///
10567    /// **Performance notes:**
10568    ///
10569    /// * Blocked-ELL SpMM provides the best performance with Power-of-2 Block-Sizes.
10570    /// * Large Block-Sizes (e.g. ≥ 64) provide the best performance.
10571    ///
10572    /// The function has the following limitations:
10573    ///
10574    /// * The pointer mode must be equal to [`cusparsePointerMode_t::CUSPARSE_POINTER_MODE_HOST`]
10575    /// * Only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is supported.
10576    /// * `opB == CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE` is not supported.
10577    /// * Only [`cusparseIndexType_t::CUSPARSE_INDEX_32I`] is supported.
10578    ///
10579    /// Please visit [cuSPARSE Library Samples - cusparseSpMM Blocked-ELL](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_blockedell) for a code example.
10580    ///
10581    /// # Parameters
10582    ///
10583    /// - `handle`: Handle to the cuSPARSE library context.
10584    /// - `opA`: Operation `op(A)`.
10585    /// - `opB`: Operation `op(B)`.
10586    /// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
10587    /// - `matA`: Sparse matrix `A`.
10588    /// - `matB`: Dense matrix `B`.
10589    /// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
10590    /// - `matC`: Dense matrix `C`.
10591    /// - `computeType`: Datatype in which the computation is executed.
10592    /// - `alg`: Algorithm for the computation.
10593    /// - `externalBuffer`: Pointer to workspace buffer of at least `bufferSize` bytes.
10594    pub fn cusparseSpMM(
10595        handle: cusparseHandle_t,
10596        opA: cusparseOperation_t,
10597        opB: cusparseOperation_t,
10598        alpha: *const ::core::ffi::c_void,
10599        matA: cusparseConstSpMatDescr_t,
10600        matB: cusparseConstDnMatDescr_t,
10601        beta: *const ::core::ffi::c_void,
10602        matC: cusparseDnMatDescr_t,
10603        computeType: cudaDataType,
10604        alg: cusparseSpMMAlg_t,
10605        externalBuffer: *mut ::core::ffi::c_void,
10606    ) -> cusparseStatus_t;
10607}
10608#[repr(u32)]
10609#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10610pub enum cusparseSpGEMMAlg_t {
10611    CUSPARSE_SPGEMM_DEFAULT = 0,
10612    CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC = 1,
10613    CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC = 2,
10614    CUSPARSE_SPGEMM_ALG1 = 3,
10615    CUSPARSE_SPGEMM_ALG2 = 4,
10616    CUSPARSE_SPGEMM_ALG3 = 5,
10617}
10618#[repr(C)]
10619#[derive(Debug, Copy, Clone)]
10620pub struct cusparseSpGEMMDescr {
10621    _unused: [u8; 0],
10622}
10623pub type cusparseSpGEMMDescr_t = *mut cusparseSpGEMMDescr;
10624unsafe extern "C" {
10625    pub fn cusparseSpGEMM_createDescr(
10626        descr: *mut cusparseSpGEMMDescr_t,
10627    ) -> cusparseStatus_t;
10628}
10629unsafe extern "C" {
10630    pub fn cusparseSpGEMM_destroyDescr(descr: cusparseSpGEMMDescr_t) -> cusparseStatus_t;
10631}
10632unsafe extern "C" {
10633    pub fn cusparseSpGEMM_workEstimation(
10634        handle: cusparseHandle_t,
10635        opA: cusparseOperation_t,
10636        opB: cusparseOperation_t,
10637        alpha: *const ::core::ffi::c_void,
10638        matA: cusparseConstSpMatDescr_t,
10639        matB: cusparseConstSpMatDescr_t,
10640        beta: *const ::core::ffi::c_void,
10641        matC: cusparseSpMatDescr_t,
10642        computeType: cudaDataType,
10643        alg: cusparseSpGEMMAlg_t,
10644        spgemmDescr: cusparseSpGEMMDescr_t,
10645        bufferSize1: *mut size_t,
10646        externalBuffer1: *mut ::core::ffi::c_void,
10647    ) -> cusparseStatus_t;
10648}
10649unsafe extern "C" {
10650    pub fn cusparseSpGEMM_getNumProducts(
10651        spgemmDescr: cusparseSpGEMMDescr_t,
10652        num_prods: *mut i64,
10653    ) -> cusparseStatus_t;
10654}
10655unsafe extern "C" {
10656    pub fn cusparseSpGEMM_estimateMemory(
10657        handle: cusparseHandle_t,
10658        opA: cusparseOperation_t,
10659        opB: cusparseOperation_t,
10660        alpha: *const ::core::ffi::c_void,
10661        matA: cusparseConstSpMatDescr_t,
10662        matB: cusparseConstSpMatDescr_t,
10663        beta: *const ::core::ffi::c_void,
10664        matC: cusparseSpMatDescr_t,
10665        computeType: cudaDataType,
10666        alg: cusparseSpGEMMAlg_t,
10667        spgemmDescr: cusparseSpGEMMDescr_t,
10668        chunk_fraction: f32,
10669        bufferSize3: *mut size_t,
10670        externalBuffer3: *mut ::core::ffi::c_void,
10671        bufferSize2: *mut size_t,
10672    ) -> cusparseStatus_t;
10673}
10674unsafe extern "C" {
10675    pub fn cusparseSpGEMM_compute(
10676        handle: cusparseHandle_t,
10677        opA: cusparseOperation_t,
10678        opB: cusparseOperation_t,
10679        alpha: *const ::core::ffi::c_void,
10680        matA: cusparseConstSpMatDescr_t,
10681        matB: cusparseConstSpMatDescr_t,
10682        beta: *const ::core::ffi::c_void,
10683        matC: cusparseSpMatDescr_t,
10684        computeType: cudaDataType,
10685        alg: cusparseSpGEMMAlg_t,
10686        spgemmDescr: cusparseSpGEMMDescr_t,
10687        bufferSize2: *mut size_t,
10688        externalBuffer2: *mut ::core::ffi::c_void,
10689    ) -> cusparseStatus_t;
10690}
10691unsafe extern "C" {
10692    pub fn cusparseSpGEMM_copy(
10693        handle: cusparseHandle_t,
10694        opA: cusparseOperation_t,
10695        opB: cusparseOperation_t,
10696        alpha: *const ::core::ffi::c_void,
10697        matA: cusparseConstSpMatDescr_t,
10698        matB: cusparseConstSpMatDescr_t,
10699        beta: *const ::core::ffi::c_void,
10700        matC: cusparseSpMatDescr_t,
10701        computeType: cudaDataType,
10702        alg: cusparseSpGEMMAlg_t,
10703        spgemmDescr: cusparseSpGEMMDescr_t,
10704    ) -> cusparseStatus_t;
10705}
10706unsafe extern "C" {
10707    pub fn cusparseSpGEMMreuse_workEstimation(
10708        handle: cusparseHandle_t,
10709        opA: cusparseOperation_t,
10710        opB: cusparseOperation_t,
10711        matA: cusparseConstSpMatDescr_t,
10712        matB: cusparseConstSpMatDescr_t,
10713        matC: cusparseSpMatDescr_t,
10714        alg: cusparseSpGEMMAlg_t,
10715        spgemmDescr: cusparseSpGEMMDescr_t,
10716        bufferSize1: *mut size_t,
10717        externalBuffer1: *mut ::core::ffi::c_void,
10718    ) -> cusparseStatus_t;
10719}
10720unsafe extern "C" {
10721    /// This function computes the number of nonzero elements per row or column and the total number of nonzero elements in a dense matrix.
10722    ///
10723    /// * This function requires temporary extra storage that is allocated internally.
10724    /// * The routine supports asynchronous execution if the Stream Ordered Memory Allocator is available.
10725    /// * The routine supports CUDA graph capture if the Stream Ordered Memory Allocator is available.
10726    pub fn cusparseSpGEMMreuse_nnz(
10727        handle: cusparseHandle_t,
10728        opA: cusparseOperation_t,
10729        opB: cusparseOperation_t,
10730        matA: cusparseConstSpMatDescr_t,
10731        matB: cusparseConstSpMatDescr_t,
10732        matC: cusparseSpMatDescr_t,
10733        alg: cusparseSpGEMMAlg_t,
10734        spgemmDescr: cusparseSpGEMMDescr_t,
10735        bufferSize2: *mut size_t,
10736        externalBuffer2: *mut ::core::ffi::c_void,
10737        bufferSize3: *mut size_t,
10738        externalBuffer3: *mut ::core::ffi::c_void,
10739        bufferSize4: *mut size_t,
10740        externalBuffer4: *mut ::core::ffi::c_void,
10741    ) -> cusparseStatus_t;
10742}
10743unsafe extern "C" {
10744    pub fn cusparseSpGEMMreuse_copy(
10745        handle: cusparseHandle_t,
10746        opA: cusparseOperation_t,
10747        opB: cusparseOperation_t,
10748        matA: cusparseConstSpMatDescr_t,
10749        matB: cusparseConstSpMatDescr_t,
10750        matC: cusparseSpMatDescr_t,
10751        alg: cusparseSpGEMMAlg_t,
10752        spgemmDescr: cusparseSpGEMMDescr_t,
10753        bufferSize5: *mut size_t,
10754        externalBuffer5: *mut ::core::ffi::c_void,
10755    ) -> cusparseStatus_t;
10756}
10757unsafe extern "C" {
10758    pub fn cusparseSpGEMMreuse_compute(
10759        handle: cusparseHandle_t,
10760        opA: cusparseOperation_t,
10761        opB: cusparseOperation_t,
10762        alpha: *const ::core::ffi::c_void,
10763        matA: cusparseConstSpMatDescr_t,
10764        matB: cusparseConstSpMatDescr_t,
10765        beta: *const ::core::ffi::c_void,
10766        matC: cusparseSpMatDescr_t,
10767        computeType: cudaDataType,
10768        alg: cusparseSpGEMMAlg_t,
10769        spgemmDescr: cusparseSpGEMMDescr_t,
10770    ) -> cusparseStatus_t;
10771}
10772#[repr(u32)]
10773#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10774pub enum cusparseSDDMMAlg_t {
10775    CUSPARSE_SDDMM_ALG_DEFAULT = 0,
10776}
10777unsafe extern "C" {
10778    pub fn cusparseSDDMM_bufferSize(
10779        handle: cusparseHandle_t,
10780        opA: cusparseOperation_t,
10781        opB: cusparseOperation_t,
10782        alpha: *const ::core::ffi::c_void,
10783        matA: cusparseConstDnMatDescr_t,
10784        matB: cusparseConstDnMatDescr_t,
10785        beta: *const ::core::ffi::c_void,
10786        matC: cusparseSpMatDescr_t,
10787        computeType: cudaDataType,
10788        alg: cusparseSDDMMAlg_t,
10789        bufferSize: *mut size_t,
10790    ) -> cusparseStatus_t;
10791}
10792unsafe extern "C" {
10793    pub fn cusparseSDDMM_preprocess(
10794        handle: cusparseHandle_t,
10795        opA: cusparseOperation_t,
10796        opB: cusparseOperation_t,
10797        alpha: *const ::core::ffi::c_void,
10798        matA: cusparseConstDnMatDescr_t,
10799        matB: cusparseConstDnMatDescr_t,
10800        beta: *const ::core::ffi::c_void,
10801        matC: cusparseSpMatDescr_t,
10802        computeType: cudaDataType,
10803        alg: cusparseSDDMMAlg_t,
10804        externalBuffer: *mut ::core::ffi::c_void,
10805    ) -> cusparseStatus_t;
10806}
10807unsafe extern "C" {
10808    /// This function performs the multiplication of `matA` and `matB`, followed by an element-wise multiplication with the sparsity pattern of `matC`. Formally, it performs the following operation:
10809    ///
10810    /// where
10811    ///
10812    /// * `op(A)` is a dense matrix of size $m \times k$
10813    /// * `op(B)` is a dense matrix of size $k \times n$
10814    /// * `C` is a sparse matrix of size $m \times n$
10815    /// * $\alpha$ and $\beta$ are scalars
10816    /// * $\circ$ denotes the Hadamard (entry-wise) matrix product, and ${spy}\left( \mathbf{C} \right)$ is the structural sparsity pattern matrix of `C` defined as:
10817    ///
10818    /// ![image8](_images/spy-c.png)
10819    ///
10820    /// Also, for matrix `A` and `B`
10821    ///
10822    /// ![image13](_images/op-a-non-xpose-2.png)
10823    ///
10824    /// ![image7](_images/op-b-non-xpose-2.png)
10825    ///
10826    /// The function [`cusparseSDDMM_bufferSize`] returns the size of the workspace needed by [`cusparseSDDMM`] or [`cusparseSDDMM_preprocess`].
10827    ///
10828    /// Calling [`cusparseSDDMM_preprocess`] is optional.
10829    /// It may accelerate subsequent calls to [`cusparseSDDMM`].
10830    /// It is useful when [`cusparseSDDMM`] is called multiple times with the same sparsity pattern (`matC`).
10831    ///
10832    /// Calling [`cusparseSDDMM_preprocess`] with `buffer` makes that buffer “active” for `matC` SDDMM calls.
10833    /// Subsequent calls to [`cusparseSDDMM`] with `matC` and the active buffer
10834    /// must use the same values for all parameters as the call to [`cusparseSDDMM_preprocess`].
10835    /// The exceptions are: `alpha`, `beta`, `matA`, `matB`, and the values (but not indices) of `matC` may be different.
10836    /// Importantly, the buffer contents must be unmodified since the call to [`cusparseSDDMM_preprocess`].
10837    /// When [`cusparseSDDMM`] is called with `matC` and its active buffer, it may read acceleration data from the buffer.
10838    ///
10839    /// Calling [`cusparseSDDMM_preprocess`] again with `matC` and a new buffer will make the new buffer active,
10840    /// forgetting about the previously-active buffer and making it inactive.
10841    /// For [`cusparseSDDMM`], there can only be one active buffer per sparse matrix at a time.
10842    /// To get the effect of multiple active buffers for a single sparse matrix,
10843    /// create multiple matrix handles that all point to the same index and value buffers,
10844    /// and call [`cusparseSDDMM_preprocess`] once per handle with different workspace buffers.
10845    ///
10846    /// Calling [`cusparseSDDMM`] with an inactive buffer is always permitted.
10847    /// However, there may be no acceleration from the preprocessing in that case.
10848    ///
10849    /// For the purposes of thread safety,
10850    /// [`cusparseSDDMM_preprocess`] is writing to `matC` internal state.
10851    ///
10852    /// Currently supported sparse matrix formats:
10853    ///
10854    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
10855    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`]
10856    ///
10857    /// [`cusparseSDDMM`] supports the following index type for representing the sparse matrix `matA`:
10858    ///
10859    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
10860    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
10861    ///
10862    /// The data types combinations currently supported for [`cusparseSDDMM`] are listed below:
10863    ///
10864    /// Uniform-precision computation:
10865    ///
10866    /// | `A`/`X`/ `Y`/`computeType` |
10867    /// | --- |
10868    /// | [`cudaDataType_t::CUDA_R_32F`] |
10869    /// | [`cudaDataType_t::CUDA_R_64F`] |
10870    /// | [`cudaDataType_t::CUDA_C_32F`] |
10871    /// | [`cudaDataType_t::CUDA_C_64F`] |
10872    ///
10873    /// Mixed-precision computation:
10874    ///
10875    /// | `A`/`B` | `C` | `computeType` |
10876    /// | --- | --- | --- |
10877    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
10878    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] |  |
10879    ///
10880    /// [`cusparseSDDMM`] for [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`] also supports the following mixed-precision computation:
10881    ///
10882    /// | `A`/`B` | `C` | `computeType` |
10883    /// | --- | --- | --- |
10884    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
10885    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] |  |
10886    ///
10887    /// NOTE: [`cudaDataType_t::CUDA_R_16F`], [`cudaDataType_t::CUDA_R_16BF`] data types always imply mixed-precision computation.
10888    ///
10889    /// [`cusparseSDDMM`] for `CUSPASRE_FORMAT_BSR` supports block sizes of 2, 4, 8, 16, 32, 64 and 128.
10890    ///
10891    /// [`cusparseSDDMM`] supports the following algorithms:
10892    ///
10893    /// | Algorithm | Notes |
10894    /// | --- | --- |
10895    /// | [`cusparseSDDMMAlg_t::CUSPARSE_SDDMM_ALG_DEFAULT`] | Default algorithm. It supports batched computation. |
10896    ///
10897    /// Performance notes: [`cusparseSDDMM`] for [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`] provides the best performance when `matA` and `matB` satisfy:
10898    ///
10899    /// * `matA`:
10900    ///
10901    /// + `matA` is in row-major order and `opA` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
10902    /// + `matA` is in col-major order and `opA` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
10903    /// * `matB`:
10904    ///
10905    /// + `matB` is in col-major order and `opB` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
10906    /// + `matB` is in row-major order and `opB` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
10907    ///
10908    /// [`cusparseSDDMM`] for [`cusparseFormat_t::CUSPARSE_FORMAT_BSR`] provides the best performance when `matA` and `matB` satisfy:
10909    ///
10910    /// * `matA`:
10911    ///
10912    /// + `matA` is in row-major order and `opA` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
10913    /// + `matA` is in col-major order and `opA` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
10914    /// * `matB`:
10915    ///
10916    /// + `matB` is in row-major order and `opB` is [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`], or
10917    /// + `matB` is in col-major order and `opB` is not [`cusparseOperation_t::CUSPARSE_OPERATION_NON_TRANSPOSE`]
10918    ///
10919    /// [`cusparseSDDMM`] supports the following batch modes:
10920    ///
10921    /// * $C_{i} = (A \cdot B) \circ C_{i}$
10922    /// * $C_{i} = \left( A_{i} \cdot B \right) \circ C_{i}$
10923    /// * $C_{i} = \left( A \cdot B_{i} \right) \circ C_{i}$
10924    /// * $C_{i} = \left( A_{i} \cdot B_{i} \right) \circ C_{i}$
10925    ///
10926    /// The number of batches and their strides can be set by using [`cusparseCsrSetStridedBatch`] and [`cusparseDnMatSetStridedBatch`]. The maximum number of batches for [`cusparseSDDMM`] is 65,535.
10927    ///
10928    /// [`cusparseSDDMM`] has the following properties:
10929    ///
10930    /// * The routine requires no extra storage
10931    /// * Provides deterministic (bit-wise) results for each run
10932    /// * The routine supports asynchronous execution
10933    /// * The routine allows the indices of `matC` to be unsorted
10934    ///
10935    /// [`cusparseSDDMM`] supports the following optimizations:
10936    ///
10937    /// * CUDA graph capture
10938    /// * Hardware Memory Compression
10939    ///
10940    /// Please visit [cuSPARSE Library Samples - cusparseSDDMM](https://github.com/NVIDIA/CUDALibrarySamples/blob/main/cuSPARSE/sddmm_csr) for a code example. For batched computation please visit [cusparseSDDMM CSR Batched](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/sddmm_csr_batched).
10941    ///
10942    /// # Parameters
10943    ///
10944    /// - `handle`: Handle to the cuSPARSE library context.
10945    /// - `opA`: Operation `op(A)`.
10946    /// - `opB`: Operation `op(B)`.
10947    /// - `alpha`: $\alpha$ scalar used for multiplication of type `computeType`.
10948    /// - `matA`: Dense matrix `matA`.
10949    /// - `matB`: Dense matrix `matB`.
10950    /// - `beta`: $\beta$ scalar used for multiplication of type `computeType`.
10951    /// - `matC`: Sparse matrix `matC`.
10952    /// - `computeType`: Datatype in which the computation is executed.
10953    /// - `alg`: Algorithm for the computation.
10954    /// - `externalBuffer`: Pointer to a workspace buffer of at least `bufferSize` bytes.
10955    pub fn cusparseSDDMM(
10956        handle: cusparseHandle_t,
10957        opA: cusparseOperation_t,
10958        opB: cusparseOperation_t,
10959        alpha: *const ::core::ffi::c_void,
10960        matA: cusparseConstDnMatDescr_t,
10961        matB: cusparseConstDnMatDescr_t,
10962        beta: *const ::core::ffi::c_void,
10963        matC: cusparseSpMatDescr_t,
10964        computeType: cudaDataType,
10965        alg: cusparseSDDMMAlg_t,
10966        externalBuffer: *mut ::core::ffi::c_void,
10967    ) -> cusparseStatus_t;
10968}
10969#[repr(C)]
10970#[derive(Debug, Copy, Clone)]
10971pub struct cusparseSpMMOpPlan {
10972    _unused: [u8; 0],
10973}
10974pub type cusparseSpMMOpPlan_t = *mut cusparseSpMMOpPlan;
10975#[repr(u32)]
10976#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
10977pub enum cusparseSpMMOpAlg_t {
10978    CUSPARSE_SPMM_OP_ALG_DEFAULT = 0,
10979}
10980unsafe extern "C" {
10981    pub fn cusparseSpMMOp_createPlan(
10982        handle: cusparseHandle_t,
10983        plan: *mut cusparseSpMMOpPlan_t,
10984        opA: cusparseOperation_t,
10985        opB: cusparseOperation_t,
10986        matA: cusparseConstSpMatDescr_t,
10987        matB: cusparseConstDnMatDescr_t,
10988        matC: cusparseDnMatDescr_t,
10989        computeType: cudaDataType,
10990        alg: cusparseSpMMOpAlg_t,
10991        addOperationLtoirBuffer: *const ::core::ffi::c_void,
10992        addOperationBufferSize: size_t,
10993        mulOperationLtoirBuffer: *const ::core::ffi::c_void,
10994        mulOperationBufferSize: size_t,
10995        epilogueLtoirBuffer: *const ::core::ffi::c_void,
10996        epilogueBufferSize: size_t,
10997        SpMMWorkspaceSize: *mut size_t,
10998    ) -> cusparseStatus_t;
10999}
11000unsafe extern "C" {
11001    /// *NOTE 1:* NVRTC and nvJitLink are not currently available on Arm64 Android platforms.
11002    ///
11003    /// *NOTE 2:* The routine does not support Android and Tegra platforms except Judy (sm87).
11004    ///
11005    /// *Experimental*: The function performs the multiplication of a sparse matrix `matA` and a dense matrix `matB` with custom operators.
11006    ///
11007    /// where
11008    ///
11009    /// * `op(A)` is a sparse matrix of size $m \times k$
11010    /// * `op(B)` is a dense matrix of size $k \times n$
11011    /// * `C` is a dense matrix of size $m \times n$
11012    /// * $\oplus$, $\otimes$, and $\text{epilogue}$ are custom **add**, **mul**, and **epilogue** operators respectively.
11013    ///
11014    /// Also, for matrix `A` and `B`
11015    ///
11016    /// ![image13](_images/op-a-non-xpose-2.png)
11017    ///
11018    /// ![image7](_images/op-b-non-xpose-2.png)
11019    ///
11020    /// Only `opA == CUSPARSE_OPERATION_NON_TRANSPOSE` is currently supported
11021    ///
11022    /// The function [`cusparseSpMMOp_createPlan`] returns the size of the workspace and the compiled kernel needed by [`cusparseSpMMOp`]
11023    ///
11024    /// The operators must have the following signature and return type
11025    ///
11026    /// `<computetype>` is one of `float`, `double`, [`cuComplex`], [`cuDoubleComplex`], or `int`,
11027    ///
11028    /// [`cusparseSpMMOp`] supports the following sparse matrix formats:
11029    ///
11030    /// * [`cusparseFormat_t::CUSPARSE_FORMAT_CSR`]
11031    ///
11032    /// [`cusparseSpMMOp`] supports the following index type for representing the sparse matrix `matA`:
11033    ///
11034    /// * 32-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_32I`])
11035    /// * 64-bit indices ([`cusparseIndexType_t::CUSPARSE_INDEX_64I`])
11036    ///
11037    /// [`cusparseSpMMOp`] supports the following data types:
11038    ///
11039    /// Uniform-precision computation:
11040    ///
11041    /// | `A`/`B`/ `C`/`computeType` |
11042    /// | --- |
11043    /// | [`cudaDataType_t::CUDA_R_32F`] |
11044    /// | [`cudaDataType_t::CUDA_R_64F`] |
11045    /// | [`cudaDataType_t::CUDA_C_32F`] |
11046    /// | [`cudaDataType_t::CUDA_C_64F`] |
11047    ///
11048    /// Mixed-precision computation:
11049    ///
11050    /// | `A`/`B` | `C` | `computeType` |
11051    /// | --- | --- | --- |
11052    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32I`] | [`cudaDataType_t::CUDA_R_32I`] |
11053    /// | [`cudaDataType_t::CUDA_R_8I`] | [`cudaDataType_t::CUDA_R_32F`] | [`cudaDataType_t::CUDA_R_32F`] |
11054    /// | [`cudaDataType_t::CUDA_R_16F`] |  |  |
11055    /// | [`cudaDataType_t::CUDA_R_16BF`] |  |  |
11056    /// | [`cudaDataType_t::CUDA_R_16F`] | [`cudaDataType_t::CUDA_R_16F`] |  |
11057    /// | [`cudaDataType_t::CUDA_R_16BF`] | [`cudaDataType_t::CUDA_R_16BF`] |  |
11058    ///
11059    /// [`cusparseSpMMOp`] supports the following algorithms:
11060    ///
11061    /// | Algorithm | Notes |
11062    /// | --- | --- |
11063    /// | [`cusparseSpMMOpAlg_t::CUSPARSE_SPMM_OP_ALG_DEFAULT`] | Default algorithm for any sparse matrix format |
11064    ///
11065    /// **Performance notes:**
11066    ///
11067    /// * Row-major layout provides higher performance than column-major.
11068    ///
11069    /// [`cusparseSpMMOp`] has the following properties:
11070    ///
11071    /// * The routine requires extra storage
11072    /// * The routine supports asynchronous execution
11073    /// * Provides deterministic (bit-wise) results for each run
11074    /// * The routine allows the indices of `matA` to be unsorted
11075    ///
11076    /// [`cusparseSpMMOp`] supports the following optimizations:
11077    ///
11078    /// * CUDA graph capture
11079    /// * Hardware Memory Compression
11080    ///
11081    /// Please visit [cuSPARSE Library Samples - cusparseSpMMOp](https://github.com/NVIDIA/CUDALibrarySamples/tree/main/cuSPARSE/spmm_csr_op).
11082    pub fn cusparseSpMMOp(
11083        plan: cusparseSpMMOpPlan_t,
11084        externalBuffer: *mut ::core::ffi::c_void,
11085    ) -> cusparseStatus_t;
11086}
11087unsafe extern "C" {
11088    pub fn cusparseSpMMOp_destroyPlan(plan: cusparseSpMMOpPlan_t) -> cusparseStatus_t;
11089}
singe_cusparse_sys/sys_12709.rs

singe_cusparse_sys/
sys_12709.rs