singe_cusolver/
dense.rs

1#[allow(unused_imports)]
2use crate::error::Status;
3
4use singe_cuda::{
5    data_type::{DataType, DataTypeLike},
6    memory::DeviceMemory,
7    types::{Complex32, Complex64},
8};
9
10use crate::{
11    context::Context,
12    error::{Error, Result},
13    layout::{
14        BatchedMatrixRef, BatchedVectorRef, ByteWorkspaceMut, MatrixMut, MatrixRef, VectorMut,
15        VectorRef, WorkspaceSizes,
16    },
17    params::Params,
18    sys, try_ffi,
19    types::{DiagonalType, DirectMode, FillMode, Operation, SideMode, StorevMode},
20    utility::{to_i32, to_i64, to_usize},
21};
22
23pub fn spotrf_buffer_size(
24    ctx: &Context,
25    fill_mode: FillMode,
26    n: usize,
27    a: &mut DeviceMemory<f32>,
28    lda: usize,
29) -> Result<usize> {
30    ctx.bind()?;
31    validate_square_matrix(n, a.len(), lda)?;
32    let mut lwork = 0;
33    unsafe {
34        try_ffi!(sys::cusolverDnSpotrf_bufferSize(
35            ctx.as_raw(),
36            fill_mode.into(),
37            to_i32(n, "n")?,
38            a.as_mut_ptr().cast(),
39            to_i32(lda, "lda")?,
40            &raw mut lwork,
41        ))?;
42    }
43    to_usize(lwork, "lwork")
44}
45
46pub fn dpotrf_buffer_size(
47    ctx: &Context,
48    fill_mode: FillMode,
49    n: usize,
50    a: &mut DeviceMemory<f64>,
51    lda: usize,
52) -> Result<usize> {
53    ctx.bind()?;
54    validate_square_matrix(n, a.len(), lda)?;
55    let mut lwork = 0;
56    unsafe {
57        try_ffi!(sys::cusolverDnDpotrf_bufferSize(
58            ctx.as_raw(),
59            fill_mode.into(),
60            to_i32(n, "n")?,
61            a.as_mut_ptr().cast(),
62            to_i32(lda, "lda")?,
63            &raw mut lwork,
64        ))?;
65    }
66    to_usize(lwork, "lwork")
67}
68
69pub fn cpotrf_buffer_size(
70    ctx: &Context,
71    fill_mode: FillMode,
72    n: usize,
73    a: &mut DeviceMemory<Complex32>,
74    lda: usize,
75) -> Result<usize> {
76    ctx.bind()?;
77    validate_square_matrix(n, a.len(), lda)?;
78    let mut lwork = 0;
79    unsafe {
80        try_ffi!(sys::cusolverDnCpotrf_bufferSize(
81            ctx.as_raw(),
82            fill_mode.into(),
83            to_i32(n, "n")?,
84            a.as_mut_ptr().cast(),
85            to_i32(lda, "lda")?,
86            &raw mut lwork,
87        ))?;
88    }
89    to_usize(lwork, "lwork")
90}
91
92pub fn zpotrf_buffer_size(
93    ctx: &Context,
94    fill_mode: FillMode,
95    n: usize,
96    a: &mut DeviceMemory<Complex64>,
97    lda: usize,
98) -> Result<usize> {
99    ctx.bind()?;
100    validate_square_matrix(n, a.len(), lda)?;
101    let mut lwork = 0;
102    unsafe {
103        try_ffi!(sys::cusolverDnZpotrf_bufferSize(
104            ctx.as_raw(),
105            fill_mode.into(),
106            to_i32(n, "n")?,
107            a.as_mut_ptr().cast(),
108            to_i32(lda, "lda")?,
109            &raw mut lwork,
110        ))?;
111    }
112    to_usize(lwork, "lwork")
113}
114
115/// Use the matching buffer-size helper to calculate the required workspace size.
116///
117/// The S and D data types are real valued single and double precision, respectively.
118///
119/// The C and Z data types are complex valued single and double precision, respectively.
120///
121/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
122///
123/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
124/// `fill_mode` indicates which part of the matrix is used.
125/// The other triangular part is left unchanged.
126///
127/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
128///
129/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
130///
131/// Provide workspace through `workspace`.
132/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
133///
134/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
135/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
136///
137/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
138///
139/// # Errors
140///
141/// Returns an error if cuSOLVER has not been initialized, if the
142/// matrix dimensions or leading dimension are invalid, if the current GPU
143/// architecture is unsupported, or if cuSOLVER reports an internal failure.
144pub fn spotrf(
145    ctx: &Context,
146    fill_mode: FillMode,
147    n: usize,
148    a: &mut DeviceMemory<f32>,
149    lda: usize,
150    workspace: &mut DeviceMemory<f32>,
151    dev_info: &mut DeviceMemory<i32>,
152) -> Result<()> {
153    ctx.bind()?;
154    validate_square_matrix(n, a.len(), lda)?;
155    require_info_buffer(dev_info)?;
156    let lwork = spotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
157    require_workspace(workspace.len(), lwork)?;
158    unsafe {
159        try_ffi!(sys::cusolverDnSpotrf(
160            ctx.as_raw(),
161            fill_mode.into(),
162            to_i32(n, "n")?,
163            a.as_mut_ptr().cast(),
164            to_i32(lda, "lda")?,
165            workspace.as_mut_ptr().cast(),
166            to_i32(lwork, "lwork")?,
167            dev_info.as_mut_ptr().cast(),
168        ))?;
169    }
170    Ok(())
171}
172
173/// Use the matching buffer-size helper to calculate the required workspace size.
174///
175/// The S and D data types are real valued single and double precision, respectively.
176///
177/// The C and Z data types are complex valued single and double precision, respectively.
178///
179/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
180///
181/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
182/// `fill_mode` indicates which part of the matrix is used.
183/// The other triangular part is left unchanged.
184///
185/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
186///
187/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
188///
189/// Provide workspace through `workspace`.
190/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
191///
192/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
193/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
194///
195/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
196///
197/// # Errors
198///
199/// Returns an error if cuSOLVER has not been initialized, if the
200/// matrix dimensions or leading dimension are invalid, if the current GPU
201/// architecture is unsupported, or if cuSOLVER reports an internal failure.
202pub fn dpotrf(
203    ctx: &Context,
204    fill_mode: FillMode,
205    n: usize,
206    a: &mut DeviceMemory<f64>,
207    lda: usize,
208    workspace: &mut DeviceMemory<f64>,
209    dev_info: &mut DeviceMemory<i32>,
210) -> Result<()> {
211    ctx.bind()?;
212    validate_square_matrix(n, a.len(), lda)?;
213    require_info_buffer(dev_info)?;
214    let lwork = dpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
215    require_workspace(workspace.len(), lwork)?;
216    unsafe {
217        try_ffi!(sys::cusolverDnDpotrf(
218            ctx.as_raw(),
219            fill_mode.into(),
220            to_i32(n, "n")?,
221            a.as_mut_ptr().cast(),
222            to_i32(lda, "lda")?,
223            workspace.as_mut_ptr().cast(),
224            to_i32(lwork, "lwork")?,
225            dev_info.as_mut_ptr().cast(),
226        ))?;
227    }
228    Ok(())
229}
230
231/// Use the matching buffer-size helper to calculate the required workspace size.
232///
233/// The S and D data types are real valued single and double precision, respectively.
234///
235/// The C and Z data types are complex valued single and double precision, respectively.
236///
237/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
238///
239/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
240/// `fill_mode` indicates which part of the matrix is used.
241/// The other triangular part is left unchanged.
242///
243/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
244///
245/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
246///
247/// Provide workspace through `workspace`.
248/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
249///
250/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
251/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
252///
253/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
254///
255/// # Errors
256///
257/// Returns an error if cuSOLVER has not been initialized, if the
258/// matrix dimensions or leading dimension are invalid, if the current GPU
259/// architecture is unsupported, or if cuSOLVER reports an internal failure.
260pub fn cpotrf(
261    ctx: &Context,
262    fill_mode: FillMode,
263    n: usize,
264    a: &mut DeviceMemory<Complex32>,
265    lda: usize,
266    workspace: &mut DeviceMemory<Complex32>,
267    dev_info: &mut DeviceMemory<i32>,
268) -> Result<()> {
269    ctx.bind()?;
270    validate_square_matrix(n, a.len(), lda)?;
271    require_info_buffer(dev_info)?;
272    let lwork = cpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
273    require_workspace(workspace.len(), lwork)?;
274    unsafe {
275        try_ffi!(sys::cusolverDnCpotrf(
276            ctx.as_raw(),
277            fill_mode.into(),
278            to_i32(n, "n")?,
279            a.as_mut_ptr().cast(),
280            to_i32(lda, "lda")?,
281            workspace.as_mut_ptr().cast(),
282            to_i32(lwork, "lwork")?,
283            dev_info.as_mut_ptr().cast(),
284        ))?;
285    }
286    Ok(())
287}
288
289/// Use the matching buffer-size helper to calculate the required workspace size.
290///
291/// The S and D data types are real valued single and double precision, respectively.
292///
293/// The C and Z data types are complex valued single and double precision, respectively.
294///
295/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
296///
297/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
298/// `fill_mode` indicates which part of the matrix is used.
299/// The other triangular part is left unchanged.
300///
301/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
302///
303/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
304///
305/// Provide workspace through `workspace`.
306/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
307///
308/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
309/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
310///
311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
312///
313/// # Errors
314///
315/// Returns an error if cuSOLVER has not been initialized, if the
316/// matrix dimensions or leading dimension are invalid, if the current GPU
317/// architecture is unsupported, or if cuSOLVER reports an internal failure.
318pub fn zpotrf(
319    ctx: &Context,
320    fill_mode: FillMode,
321    n: usize,
322    a: &mut DeviceMemory<Complex64>,
323    lda: usize,
324    workspace: &mut DeviceMemory<Complex64>,
325    dev_info: &mut DeviceMemory<i32>,
326) -> Result<()> {
327    ctx.bind()?;
328    validate_square_matrix(n, a.len(), lda)?;
329    require_info_buffer(dev_info)?;
330    let lwork = zpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
331    require_workspace(workspace.len(), lwork)?;
332    unsafe {
333        try_ffi!(sys::cusolverDnZpotrf(
334            ctx.as_raw(),
335            fill_mode.into(),
336            to_i32(n, "n")?,
337            a.as_mut_ptr().cast(),
338            to_i32(lda, "lda")?,
339            workspace.as_mut_ptr().cast(),
340            to_i32(lwork, "lwork")?,
341            dev_info.as_mut_ptr().cast(),
342        ))?;
343    }
344    Ok(())
345}
346
347/// Solves a system of linear equations
348///
349/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
350/// `fill_mode` indicates which part of the matrix is used.
351/// The other triangular part is left unchanged.
352///
353/// Call `potrf` first to factorize matrix `A`.
354/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
355/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
356///
357/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
358///
359/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
360///
361/// # Errors
362///
363/// Returns an error if cuSOLVER has not been initialized, if the
364/// matrix dimensions, right-hand-side count, or leading dimensions are
365/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
366/// reports an internal failure.
367pub fn spotrs(
368    ctx: &Context,
369    fill_mode: FillMode,
370    n: usize,
371    nrhs: usize,
372    a: &DeviceMemory<f32>,
373    lda: usize,
374    b: &mut DeviceMemory<f32>,
375    ldb: usize,
376    dev_info: &mut DeviceMemory<i32>,
377) -> Result<()> {
378    ctx.bind()?;
379    validate_square_matrix(n, a.len(), lda)?;
380    validate_matrix(n, nrhs, b.len(), ldb)?;
381    require_info_buffer(dev_info)?;
382    unsafe {
383        try_ffi!(sys::cusolverDnSpotrs(
384            ctx.as_raw(),
385            fill_mode.into(),
386            to_i32(n, "n")?,
387            to_i32(nrhs, "nrhs")?,
388            a.as_ptr().cast(),
389            to_i32(lda, "lda")?,
390            b.as_mut_ptr().cast(),
391            to_i32(ldb, "ldb")?,
392            dev_info.as_mut_ptr().cast(),
393        ))?;
394    }
395    Ok(())
396}
397
398/// Solves a system of linear equations
399///
400/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
401/// `fill_mode` indicates which part of the matrix is used.
402/// The other triangular part is left unchanged.
403///
404/// Call `potrf` first to factorize matrix `A`.
405/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
406/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
407///
408/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
409///
410/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
411///
412/// # Errors
413///
414/// Returns an error if cuSOLVER has not been initialized, if the
415/// matrix dimensions, right-hand-side count, or leading dimensions are
416/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
417/// reports an internal failure.
418pub fn dpotrs(
419    ctx: &Context,
420    fill_mode: FillMode,
421    n: usize,
422    nrhs: usize,
423    a: &DeviceMemory<f64>,
424    lda: usize,
425    b: &mut DeviceMemory<f64>,
426    ldb: usize,
427    dev_info: &mut DeviceMemory<i32>,
428) -> Result<()> {
429    ctx.bind()?;
430    validate_square_matrix(n, a.len(), lda)?;
431    validate_matrix(n, nrhs, b.len(), ldb)?;
432    require_info_buffer(dev_info)?;
433    unsafe {
434        try_ffi!(sys::cusolverDnDpotrs(
435            ctx.as_raw(),
436            fill_mode.into(),
437            to_i32(n, "n")?,
438            to_i32(nrhs, "nrhs")?,
439            a.as_ptr().cast(),
440            to_i32(lda, "lda")?,
441            b.as_mut_ptr().cast(),
442            to_i32(ldb, "ldb")?,
443            dev_info.as_mut_ptr().cast(),
444        ))?;
445    }
446    Ok(())
447}
448
449/// Solves a system of linear equations
450///
451/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
452/// `fill_mode` indicates which part of the matrix is used.
453/// The other triangular part is left unchanged.
454///
455/// Call `potrf` first to factorize matrix `A`.
456/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
457/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
458///
459/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
460///
461/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
462///
463/// # Errors
464///
465/// Returns an error if cuSOLVER has not been initialized, if the
466/// matrix dimensions, right-hand-side count, or leading dimensions are
467/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
468/// reports an internal failure.
469pub fn cpotrs(
470    ctx: &Context,
471    fill_mode: FillMode,
472    n: usize,
473    nrhs: usize,
474    a: &DeviceMemory<Complex32>,
475    lda: usize,
476    b: &mut DeviceMemory<Complex32>,
477    ldb: usize,
478    dev_info: &mut DeviceMemory<i32>,
479) -> Result<()> {
480    ctx.bind()?;
481    validate_square_matrix(n, a.len(), lda)?;
482    validate_matrix(n, nrhs, b.len(), ldb)?;
483    require_info_buffer(dev_info)?;
484    unsafe {
485        try_ffi!(sys::cusolverDnCpotrs(
486            ctx.as_raw(),
487            fill_mode.into(),
488            to_i32(n, "n")?,
489            to_i32(nrhs, "nrhs")?,
490            a.as_ptr().cast(),
491            to_i32(lda, "lda")?,
492            b.as_mut_ptr().cast(),
493            to_i32(ldb, "ldb")?,
494            dev_info.as_mut_ptr().cast(),
495        ))?;
496    }
497    Ok(())
498}
499
500/// Solves a system of linear equations
501///
502/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
503/// `fill_mode` indicates which part of the matrix is used.
504/// The other triangular part is left unchanged.
505///
506/// Call `potrf` first to factorize matrix `A`.
507/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
508/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
509///
510/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
511///
512/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
513///
514/// # Errors
515///
516/// Returns an error if cuSOLVER has not been initialized, if the
517/// matrix dimensions, right-hand-side count, or leading dimensions are
518/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
519/// reports an internal failure.
520pub fn zpotrs(
521    ctx: &Context,
522    fill_mode: FillMode,
523    n: usize,
524    nrhs: usize,
525    a: &DeviceMemory<Complex64>,
526    lda: usize,
527    b: &mut DeviceMemory<Complex64>,
528    ldb: usize,
529    dev_info: &mut DeviceMemory<i32>,
530) -> Result<()> {
531    ctx.bind()?;
532    validate_square_matrix(n, a.len(), lda)?;
533    validate_matrix(n, nrhs, b.len(), ldb)?;
534    require_info_buffer(dev_info)?;
535    unsafe {
536        try_ffi!(sys::cusolverDnZpotrs(
537            ctx.as_raw(),
538            fill_mode.into(),
539            to_i32(n, "n")?,
540            to_i32(nrhs, "nrhs")?,
541            a.as_ptr().cast(),
542            to_i32(lda, "lda")?,
543            b.as_mut_ptr().cast(),
544            to_i32(ldb, "ldb")?,
545            dev_info.as_mut_ptr().cast(),
546        ))?;
547    }
548    Ok(())
549}
550
551/// The S and D data types are real valued single and double precision, respectively.
552///
553/// The C and Z data types are complex valued single and double precision, respectively.
554///
555/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
556///
557/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
558/// `fill_mode` indicates which part of the matrix is used.
559///
560/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
561///
562/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
563///
564/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
565/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
566///
567/// `info` must have one integer entry for each matrix in the batch.
568/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
569/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
570///
571/// The other part of `A` is used as workspace.
572/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
573///
574/// # Errors
575///
576/// Returns an error if cuSOLVER has not been initialized, if the
577/// matrix dimensions, leading dimension, or batch size are invalid, or if
578/// cuSOLVER reports an internal failure.
579pub fn spotrf_batched(
580    ctx: &Context,
581    fill_mode: FillMode,
582    n: usize,
583    a: BatchedMatrixRef<'_, f32>,
584    info: &mut DeviceMemory<i32>,
585) -> Result<()> {
586    ctx.bind()?;
587    validate_batched_square_matrix_pointers(n, a)?;
588    require_info_entries(info, a.len())?;
589    unsafe {
590        try_ffi!(sys::cusolverDnSpotrfBatched(
591            ctx.as_raw(),
592            fill_mode.into(),
593            to_i32(n, "n")?,
594            a.as_mut_ptr(),
595            to_i32(a.leading_dimension, "lda")?,
596            info.as_mut_ptr().cast(),
597            to_i32(a.len(), "batch_size")?,
598        ))?;
599    }
600    Ok(())
601}
602
603/// The S and D data types are real valued single and double precision, respectively.
604///
605/// The C and Z data types are complex valued single and double precision, respectively.
606///
607/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
608///
609/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
610/// `fill_mode` indicates which part of the matrix is used.
611///
612/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
613///
614/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
615///
616/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
617/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
618///
619/// `info` must have one integer entry for each matrix in the batch.
620/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
621/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
622///
623/// The other part of `A` is used as workspace.
624/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
625///
626/// # Errors
627///
628/// Returns an error if cuSOLVER has not been initialized, if the
629/// matrix dimensions, leading dimension, or batch size are invalid, or if
630/// cuSOLVER reports an internal failure.
631pub fn dpotrf_batched(
632    ctx: &Context,
633    fill_mode: FillMode,
634    n: usize,
635    a: BatchedMatrixRef<'_, f64>,
636    info: &mut DeviceMemory<i32>,
637) -> Result<()> {
638    ctx.bind()?;
639    validate_batched_square_matrix_pointers(n, a)?;
640    require_info_entries(info, a.len())?;
641    unsafe {
642        try_ffi!(sys::cusolverDnDpotrfBatched(
643            ctx.as_raw(),
644            fill_mode.into(),
645            to_i32(n, "n")?,
646            a.as_mut_ptr(),
647            to_i32(a.leading_dimension, "lda")?,
648            info.as_mut_ptr().cast(),
649            to_i32(a.len(), "batch_size")?,
650        ))?;
651    }
652    Ok(())
653}
654
655/// The S and D data types are real valued single and double precision, respectively.
656///
657/// The C and Z data types are complex valued single and double precision, respectively.
658///
659/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
660///
661/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
662/// `fill_mode` indicates which part of the matrix is used.
663///
664/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
665///
666/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
667///
668/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
669/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
670///
671/// `info` must have one integer entry for each matrix in the batch.
672/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
673/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
674///
675/// The other part of `A` is used as workspace.
676/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
677///
678/// # Errors
679///
680/// Returns an error if cuSOLVER has not been initialized, if the
681/// matrix dimensions, leading dimension, or batch size are invalid, or if
682/// cuSOLVER reports an internal failure.
683pub fn cpotrf_batched(
684    ctx: &Context,
685    fill_mode: FillMode,
686    n: usize,
687    a: BatchedMatrixRef<'_, Complex32>,
688    info: &mut DeviceMemory<i32>,
689) -> Result<()> {
690    ctx.bind()?;
691    validate_batched_square_matrix_pointers(n, a)?;
692    require_info_entries(info, a.len())?;
693    unsafe {
694        try_ffi!(sys::cusolverDnCpotrfBatched(
695            ctx.as_raw(),
696            fill_mode.into(),
697            to_i32(n, "n")?,
698            a.as_mut_ptr().cast(),
699            to_i32(a.leading_dimension, "lda")?,
700            info.as_mut_ptr().cast(),
701            to_i32(a.len(), "batch_size")?,
702        ))?;
703    }
704    Ok(())
705}
706
707/// The S and D data types are real valued single and double precision, respectively.
708///
709/// The C and Z data types are complex valued single and double precision, respectively.
710///
711/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
712///
713/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
714/// `fill_mode` indicates which part of the matrix is used.
715///
716/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
717///
718/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
719///
720/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
721/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
722///
723/// `info` must have one integer entry for each matrix in the batch.
724/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
725/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
726///
727/// The other part of `A` is used as workspace.
728/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
729///
730/// # Errors
731///
732/// Returns an error if cuSOLVER has not been initialized, if the
733/// matrix dimensions, leading dimension, or batch size are invalid, or if
734/// cuSOLVER reports an internal failure.
735pub fn zpotrf_batched(
736    ctx: &Context,
737    fill_mode: FillMode,
738    n: usize,
739    a: BatchedMatrixRef<'_, Complex64>,
740    info: &mut DeviceMemory<i32>,
741) -> Result<()> {
742    ctx.bind()?;
743    validate_batched_square_matrix_pointers(n, a)?;
744    require_info_entries(info, a.len())?;
745    unsafe {
746        try_ffi!(sys::cusolverDnZpotrfBatched(
747            ctx.as_raw(),
748            fill_mode.into(),
749            to_i32(n, "n")?,
750            a.as_mut_ptr().cast(),
751            to_i32(a.leading_dimension, "lda")?,
752            info.as_mut_ptr().cast(),
753            to_i32(a.len(), "batch_size")?,
754        ))?;
755    }
756    Ok(())
757}
758
759/// Solves a sequence of linear systems
760///
761/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
762/// `fill_mode` indicates which part of the matrix is used.
763///
764/// Call `potrf_batched` first to factorize matrix `a[i]`.
765/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
766/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
767///
768/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
769///
770/// `info` is a single status value for the whole batched call.
771/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
772///
773/// - only `nrhs=1` is supported.
774///
775/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
776///   `info` from `potrsBatched` only reports invalid arguments for the batched call.
777///
778/// - the other part of `A` is used as a workspace.
779///   For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
780///
781/// # Errors
782///
783/// Returns an error if cuSOLVER has not been initialized, if the
784/// matrix dimensions, right-hand-side count, leading dimensions, or batch
785/// size are invalid, or if cuSOLVER reports an internal failure.
786pub fn spotrs_batched(
787    ctx: &Context,
788    fill_mode: FillMode,
789    n: usize,
790    a: BatchedMatrixRef<'_, f32>,
791    b: BatchedVectorRef<'_, f32>,
792    info: &mut DeviceMemory<i32>,
793) -> Result<()> {
794    ctx.bind()?;
795    validate_batched_square_matrix_pointers(n, a)?;
796    validate_batched_vector_pointers(n, b)?;
797    require_info_buffer(info)?;
798    if a.len() != b.len() {
799        return Err(Error::InvalidMatrixShape);
800    }
801    unsafe {
802        try_ffi!(sys::cusolverDnSpotrsBatched(
803            ctx.as_raw(),
804            fill_mode.into(),
805            to_i32(n, "n")?,
806            1,
807            a.as_mut_ptr(),
808            to_i32(a.leading_dimension, "lda")?,
809            b.as_mut_ptr(),
810            to_i32(b.leading_dimension, "ldb")?,
811            info.as_mut_ptr().cast(),
812            to_i32(a.len(), "batch_size")?,
813        ))?;
814    }
815    Ok(())
816}
817
818/// Solves a sequence of linear systems
819///
820/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
821/// `fill_mode` indicates which part of the matrix is used.
822///
823/// Call `potrf_batched` first to factorize matrix `a[i]`.
824/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
825/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
826///
827/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
828///
829/// `info` is a single status value for the whole batched call.
830/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
831///
832/// - only `nrhs=1` is supported.
833///
834/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
835///   `info` from `potrsBatched` only reports invalid arguments for the batched call.
836///
837/// - the other part of `A` is used as a workspace.
838///   For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
839///
840/// # Errors
841///
842/// Returns an error if cuSOLVER has not been initialized, if the
843/// matrix dimensions, right-hand-side count, leading dimensions, or batch
844/// size are invalid, or if cuSOLVER reports an internal failure.
845pub fn dpotrs_batched(
846    ctx: &Context,
847    fill_mode: FillMode,
848    n: usize,
849    a: BatchedMatrixRef<'_, f64>,
850    b: BatchedVectorRef<'_, f64>,
851    info: &mut DeviceMemory<i32>,
852) -> Result<()> {
853    ctx.bind()?;
854    validate_batched_square_matrix_pointers(n, a)?;
855    validate_batched_vector_pointers(n, b)?;
856    require_info_buffer(info)?;
857    if a.len() != b.len() {
858        return Err(Error::InvalidMatrixShape);
859    }
860    unsafe {
861        try_ffi!(sys::cusolverDnDpotrsBatched(
862            ctx.as_raw(),
863            fill_mode.into(),
864            to_i32(n, "n")?,
865            1,
866            a.as_mut_ptr(),
867            to_i32(a.leading_dimension, "lda")?,
868            b.as_mut_ptr(),
869            to_i32(b.leading_dimension, "ldb")?,
870            info.as_mut_ptr().cast(),
871            to_i32(a.len(), "batch_size")?,
872        ))?;
873    }
874    Ok(())
875}
876
877/// Solves a sequence of linear systems
878///
879/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
880/// `fill_mode` indicates which part of the matrix is used.
881///
882/// Call `potrf_batched` first to factorize matrix `a[i]`.
883/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
884/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
885///
886/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
887///
888/// `info` is a single status value for the whole batched call.
889/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
890///
891/// - only `nrhs=1` is supported.
892///
893/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
894///   `info` from `potrsBatched` only reports invalid arguments for the batched call.
895///
896/// - the other part of `A` is used as a workspace.
897///   For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
898///
899/// # Errors
900///
901/// Returns an error if cuSOLVER has not been initialized, if the
902/// matrix dimensions, right-hand-side count, leading dimensions, or batch
903/// size are invalid, or if cuSOLVER reports an internal failure.
904pub fn zpotrs_batched(
905    ctx: &Context,
906    fill_mode: FillMode,
907    n: usize,
908    a: BatchedMatrixRef<'_, Complex64>,
909    b: BatchedVectorRef<'_, Complex64>,
910    info: &mut DeviceMemory<i32>,
911) -> Result<()> {
912    ctx.bind()?;
913    validate_batched_square_matrix_pointers(n, a)?;
914    validate_batched_vector_pointers(n, b)?;
915    require_info_buffer(info)?;
916    if a.len() != b.len() {
917        return Err(Error::InvalidMatrixShape);
918    }
919    unsafe {
920        try_ffi!(sys::cusolverDnZpotrsBatched(
921            ctx.as_raw(),
922            fill_mode.into(),
923            to_i32(n, "n")?,
924            1,
925            a.as_mut_ptr().cast(),
926            to_i32(a.leading_dimension, "lda")?,
927            b.as_mut_ptr().cast(),
928            to_i32(b.leading_dimension, "ldb")?,
929            info.as_mut_ptr().cast(),
930            to_i32(a.len(), "batch_size")?,
931        ))?;
932    }
933    Ok(())
934}
935
936pub fn spotri_buffer_size(
937    ctx: &Context,
938    fill_mode: FillMode,
939    n: usize,
940    a: &mut DeviceMemory<f32>,
941    lda: usize,
942) -> Result<usize> {
943    ctx.bind()?;
944    validate_square_matrix(n, a.len(), lda)?;
945    let mut lwork = 0;
946    unsafe {
947        try_ffi!(sys::cusolverDnSpotri_bufferSize(
948            ctx.as_raw(),
949            fill_mode.into(),
950            to_i32(n, "n")?,
951            a.as_mut_ptr().cast(),
952            to_i32(lda, "lda")?,
953            &raw mut lwork,
954        ))?;
955    }
956    to_usize(lwork, "lwork")
957}
958
959pub fn dpotri_buffer_size(
960    ctx: &Context,
961    fill_mode: FillMode,
962    n: usize,
963    a: &mut DeviceMemory<f64>,
964    lda: usize,
965) -> Result<usize> {
966    ctx.bind()?;
967    validate_square_matrix(n, a.len(), lda)?;
968    let mut lwork = 0;
969    unsafe {
970        try_ffi!(sys::cusolverDnDpotri_bufferSize(
971            ctx.as_raw(),
972            fill_mode.into(),
973            to_i32(n, "n")?,
974            a.as_mut_ptr().cast(),
975            to_i32(lda, "lda")?,
976            &raw mut lwork,
977        ))?;
978    }
979    to_usize(lwork, "lwork")
980}
981
982pub fn cpotri_buffer_size(
983    ctx: &Context,
984    fill_mode: FillMode,
985    n: usize,
986    a: &mut DeviceMemory<Complex32>,
987    lda: usize,
988) -> Result<usize> {
989    ctx.bind()?;
990    validate_square_matrix(n, a.len(), lda)?;
991    let mut lwork = 0;
992    unsafe {
993        try_ffi!(sys::cusolverDnCpotri_bufferSize(
994            ctx.as_raw(),
995            fill_mode.into(),
996            to_i32(n, "n")?,
997            a.as_mut_ptr().cast(),
998            to_i32(lda, "lda")?,
999            &raw mut lwork,
1000        ))?;
1001    }
1002    to_usize(lwork, "lwork")
1003}
1004
1005pub fn zpotri_buffer_size(
1006    ctx: &Context,
1007    fill_mode: FillMode,
1008    n: usize,
1009    a: &mut DeviceMemory<Complex64>,
1010    lda: usize,
1011) -> Result<usize> {
1012    ctx.bind()?;
1013    validate_square_matrix(n, a.len(), lda)?;
1014    let mut lwork = 0;
1015    unsafe {
1016        try_ffi!(sys::cusolverDnZpotri_bufferSize(
1017            ctx.as_raw(),
1018            fill_mode.into(),
1019            to_i32(n, "n")?,
1020            a.as_mut_ptr().cast(),
1021            to_i32(lda, "lda")?,
1022            &raw mut lwork,
1023        ))?;
1024    }
1025    to_usize(lwork, "lwork")
1026}
1027
1028/// Use the matching buffer-size helper to calculate the required workspace size.
1029///
1030/// The S and D data types are real valued single and double precision, respectively.
1031///
1032/// The C and Z data types are complex valued single and double precision, respectively.
1033///
1034/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1035///
1036/// computed by `potrf()`.
1037///
1038/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1039/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1040/// The other triangular part is left unchanged.
1041///
1042/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1043///
1044/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1045///
1046/// Provide workspace through `workspace`.
1047/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1048///
1049/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1050///
1051/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1052///
1053/// # Errors
1054///
1055/// Returns an error if cuSOLVER has not been initialized, if the
1056/// matrix dimensions or leading dimension are invalid, if the current GPU
1057/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1058pub fn spotri(
1059    ctx: &Context,
1060    fill_mode: FillMode,
1061    n: usize,
1062    a: &mut DeviceMemory<f32>,
1063    lda: usize,
1064    workspace: &mut DeviceMemory<f32>,
1065    dev_info: &mut DeviceMemory<i32>,
1066) -> Result<()> {
1067    ctx.bind()?;
1068    validate_square_matrix(n, a.len(), lda)?;
1069    require_info_buffer(dev_info)?;
1070    let lwork = spotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1071    require_workspace(workspace.len(), lwork)?;
1072    unsafe {
1073        try_ffi!(sys::cusolverDnSpotri(
1074            ctx.as_raw(),
1075            fill_mode.into(),
1076            to_i32(n, "n")?,
1077            a.as_mut_ptr().cast(),
1078            to_i32(lda, "lda")?,
1079            workspace.as_mut_ptr().cast(),
1080            to_i32(lwork, "lwork")?,
1081            dev_info.as_mut_ptr().cast(),
1082        ))?;
1083    }
1084    Ok(())
1085}
1086
1087/// Use the matching buffer-size helper to calculate the required workspace size.
1088///
1089/// The S and D data types are real valued single and double precision, respectively.
1090///
1091/// The C and Z data types are complex valued single and double precision, respectively.
1092///
1093/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1094///
1095/// computed by `potrf()`.
1096///
1097/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1098/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1099/// The other triangular part is left unchanged.
1100///
1101/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1102///
1103/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1104///
1105/// Provide workspace through `workspace`.
1106/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1107///
1108/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1109///
1110/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1111///
1112/// # Errors
1113///
1114/// Returns an error if cuSOLVER has not been initialized, if the
1115/// matrix dimensions or leading dimension are invalid, if the current GPU
1116/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1117pub fn dpotri(
1118    ctx: &Context,
1119    fill_mode: FillMode,
1120    n: usize,
1121    a: &mut DeviceMemory<f64>,
1122    lda: usize,
1123    workspace: &mut DeviceMemory<f64>,
1124    dev_info: &mut DeviceMemory<i32>,
1125) -> Result<()> {
1126    ctx.bind()?;
1127    validate_square_matrix(n, a.len(), lda)?;
1128    require_info_buffer(dev_info)?;
1129    let lwork = dpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1130    require_workspace(workspace.len(), lwork)?;
1131    unsafe {
1132        try_ffi!(sys::cusolverDnDpotri(
1133            ctx.as_raw(),
1134            fill_mode.into(),
1135            to_i32(n, "n")?,
1136            a.as_mut_ptr().cast(),
1137            to_i32(lda, "lda")?,
1138            workspace.as_mut_ptr().cast(),
1139            to_i32(lwork, "lwork")?,
1140            dev_info.as_mut_ptr().cast(),
1141        ))?;
1142    }
1143    Ok(())
1144}
1145
1146/// Use the matching buffer-size helper to calculate the required workspace size.
1147///
1148/// The S and D data types are real valued single and double precision, respectively.
1149///
1150/// The C and Z data types are complex valued single and double precision, respectively.
1151///
1152/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1153///
1154/// computed by `potrf()`.
1155///
1156/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1157/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1158/// The other triangular part is left unchanged.
1159///
1160/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1161///
1162/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1163///
1164/// Provide workspace through `workspace`.
1165/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1166///
1167/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1168///
1169/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1170///
1171/// # Errors
1172///
1173/// Returns an error if cuSOLVER has not been initialized, if the
1174/// matrix dimensions or leading dimension are invalid, if the current GPU
1175/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1176pub fn cpotri(
1177    ctx: &Context,
1178    fill_mode: FillMode,
1179    n: usize,
1180    a: &mut DeviceMemory<Complex32>,
1181    lda: usize,
1182    workspace: &mut DeviceMemory<Complex32>,
1183    dev_info: &mut DeviceMemory<i32>,
1184) -> Result<()> {
1185    ctx.bind()?;
1186    validate_square_matrix(n, a.len(), lda)?;
1187    require_info_buffer(dev_info)?;
1188    let lwork = cpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1189    require_workspace(workspace.len(), lwork)?;
1190    unsafe {
1191        try_ffi!(sys::cusolverDnCpotri(
1192            ctx.as_raw(),
1193            fill_mode.into(),
1194            to_i32(n, "n")?,
1195            a.as_mut_ptr().cast(),
1196            to_i32(lda, "lda")?,
1197            workspace.as_mut_ptr().cast(),
1198            to_i32(lwork, "lwork")?,
1199            dev_info.as_mut_ptr().cast(),
1200        ))?;
1201    }
1202    Ok(())
1203}
1204
1205/// Use the matching buffer-size helper to calculate the required workspace size.
1206///
1207/// The S and D data types are real valued single and double precision, respectively.
1208///
1209/// The C and Z data types are complex valued single and double precision, respectively.
1210///
1211/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1212///
1213/// computed by `potrf()`.
1214///
1215/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1216/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1217/// The other triangular part is left unchanged.
1218///
1219/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1220///
1221/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1222///
1223/// Provide workspace through `workspace`.
1224/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1225///
1226/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1227///
1228/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1229///
1230/// # Errors
1231///
1232/// Returns an error if cuSOLVER has not been initialized, if the
1233/// matrix dimensions or leading dimension are invalid, if the current GPU
1234/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1235pub fn zpotri(
1236    ctx: &Context,
1237    fill_mode: FillMode,
1238    n: usize,
1239    a: &mut DeviceMemory<Complex64>,
1240    lda: usize,
1241    workspace: &mut DeviceMemory<Complex64>,
1242    dev_info: &mut DeviceMemory<i32>,
1243) -> Result<()> {
1244    ctx.bind()?;
1245    validate_square_matrix(n, a.len(), lda)?;
1246    require_info_buffer(dev_info)?;
1247    let lwork = zpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1248    require_workspace(workspace.len(), lwork)?;
1249    unsafe {
1250        try_ffi!(sys::cusolverDnZpotri(
1251            ctx.as_raw(),
1252            fill_mode.into(),
1253            to_i32(n, "n")?,
1254            a.as_mut_ptr().cast(),
1255            to_i32(lda, "lda")?,
1256            workspace.as_mut_ptr().cast(),
1257            to_i32(lwork, "lwork")?,
1258            dev_info.as_mut_ptr().cast(),
1259        ))?;
1260    }
1261    Ok(())
1262}
1263
1264pub fn sgetrf_buffer_size(
1265    ctx: &Context,
1266    m: usize,
1267    n: usize,
1268    a: &mut DeviceMemory<f32>,
1269    lda: usize,
1270) -> Result<usize> {
1271    ctx.bind()?;
1272    validate_matrix(m, n, a.len(), lda)?;
1273    let mut lwork = 0;
1274    unsafe {
1275        try_ffi!(sys::cusolverDnSgetrf_bufferSize(
1276            ctx.as_raw(),
1277            to_i32(m, "m")?,
1278            to_i32(n, "n")?,
1279            a.as_mut_ptr().cast(),
1280            to_i32(lda, "lda")?,
1281            &raw mut lwork,
1282        ))?;
1283    }
1284    to_usize(lwork, "lwork")
1285}
1286
1287pub fn dgetrf_buffer_size(
1288    ctx: &Context,
1289    m: usize,
1290    n: usize,
1291    a: &mut DeviceMemory<f64>,
1292    lda: usize,
1293) -> Result<usize> {
1294    ctx.bind()?;
1295    validate_matrix(m, n, a.len(), lda)?;
1296    let mut lwork = 0;
1297    unsafe {
1298        try_ffi!(sys::cusolverDnDgetrf_bufferSize(
1299            ctx.as_raw(),
1300            to_i32(m, "m")?,
1301            to_i32(n, "n")?,
1302            a.as_mut_ptr().cast(),
1303            to_i32(lda, "lda")?,
1304            &raw mut lwork,
1305        ))?;
1306    }
1307    to_usize(lwork, "lwork")
1308}
1309
1310pub fn cgetrf_buffer_size(
1311    ctx: &Context,
1312    m: usize,
1313    n: usize,
1314    a: &mut DeviceMemory<Complex32>,
1315    lda: usize,
1316) -> Result<usize> {
1317    ctx.bind()?;
1318    validate_matrix(m, n, a.len(), lda)?;
1319    let mut lwork = 0;
1320    unsafe {
1321        try_ffi!(sys::cusolverDnCgetrf_bufferSize(
1322            ctx.as_raw(),
1323            to_i32(m, "m")?,
1324            to_i32(n, "n")?,
1325            a.as_mut_ptr().cast(),
1326            to_i32(lda, "lda")?,
1327            &raw mut lwork,
1328        ))?;
1329    }
1330    to_usize(lwork, "lwork")
1331}
1332
1333pub fn zgetrf_buffer_size(
1334    ctx: &Context,
1335    m: usize,
1336    n: usize,
1337    a: &mut DeviceMemory<Complex64>,
1338    lda: usize,
1339) -> Result<usize> {
1340    ctx.bind()?;
1341    validate_matrix(m, n, a.len(), lda)?;
1342    let mut lwork = 0;
1343    unsafe {
1344        try_ffi!(sys::cusolverDnZgetrf_bufferSize(
1345            ctx.as_raw(),
1346            to_i32(m, "m")?,
1347            to_i32(n, "n")?,
1348            a.as_mut_ptr().cast(),
1349            to_i32(lda, "lda")?,
1350            &raw mut lwork,
1351        ))?;
1352    }
1353    to_usize(lwork, "lwork")
1354}
1355
1356/// Use the matching buffer-size helper to calculate the required workspace size.
1357///
1358/// The S and D data types are real single and double precision, respectively.
1359///
1360/// The C and Z data types are complex valued single and double precision, respectively.
1361///
1362/// Computes the LU factorization of an $m \times n$ matrix
1363///
1364/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1365///
1366/// Provide workspace through `workspace`.
1367/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1368///
1369/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1370///
1371/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1372///
1373/// If `pivots` is `None`, no pivoting is performed.
1374/// The factorization is `A=L*U`, which is not numerically stable.
1375///
1376/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1377/// sequence. Row `i` is interchanged with row `pivots[i]`.
1378///
1379/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1380///
1381/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1382/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1383///
1384/// # Errors
1385///
1386/// Returns an error if cuSOLVER has not been initialized, if the
1387/// matrix dimensions or leading dimension are invalid, if the current GPU
1388/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1389pub fn sgetrf(
1390    ctx: &Context,
1391    m: usize,
1392    n: usize,
1393    a: &mut DeviceMemory<f32>,
1394    lda: usize,
1395    workspace: &mut DeviceMemory<f32>,
1396    pivots: Option<&mut DeviceMemory<i32>>,
1397    dev_info: &mut DeviceMemory<i32>,
1398) -> Result<()> {
1399    ctx.bind()?;
1400    validate_matrix(m, n, a.len(), lda)?;
1401    require_info_buffer(dev_info)?;
1402    if let Some(pivots) = pivots.as_ref() {
1403        require_pivot_buffer(pivots, m.min(n))?;
1404    }
1405    let lwork = sgetrf_buffer_size(ctx, m, n, a, lda)?;
1406    require_workspace(workspace.len(), lwork)?;
1407    unsafe {
1408        try_ffi!(sys::cusolverDnSgetrf(
1409            ctx.as_raw(),
1410            to_i32(m, "m")?,
1411            to_i32(n, "n")?,
1412            a.as_mut_ptr().cast(),
1413            to_i32(lda, "lda")?,
1414            workspace.as_mut_ptr().cast(),
1415            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1416            dev_info.as_mut_ptr().cast(),
1417        ))?;
1418    }
1419    Ok(())
1420}
1421
1422/// Use the matching buffer-size helper to calculate the required workspace size.
1423///
1424/// The S and D data types are real single and double precision, respectively.
1425///
1426/// The C and Z data types are complex valued single and double precision, respectively.
1427///
1428/// Computes the LU factorization of an $m \times n$ matrix
1429///
1430/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1431///
1432/// Provide workspace through `workspace`.
1433/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1434///
1435/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1436///
1437/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1438///
1439/// If `pivots` is `None`, no pivoting is performed.
1440/// The factorization is `A=L*U`, which is not numerically stable.
1441///
1442/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1443/// sequence. Row `i` is interchanged with row `pivots[i]`.
1444///
1445/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1446///
1447/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1448/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1449///
1450/// # Errors
1451///
1452/// Returns an error if cuSOLVER has not been initialized, if the
1453/// matrix dimensions or leading dimension are invalid, if the current GPU
1454/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1455pub fn dgetrf(
1456    ctx: &Context,
1457    m: usize,
1458    n: usize,
1459    a: &mut DeviceMemory<f64>,
1460    lda: usize,
1461    workspace: &mut DeviceMemory<f64>,
1462    pivots: Option<&mut DeviceMemory<i32>>,
1463    dev_info: &mut DeviceMemory<i32>,
1464) -> Result<()> {
1465    ctx.bind()?;
1466    validate_matrix(m, n, a.len(), lda)?;
1467    require_info_buffer(dev_info)?;
1468    if let Some(pivots) = pivots.as_ref() {
1469        require_pivot_buffer(pivots, m.min(n))?;
1470    }
1471    let lwork = dgetrf_buffer_size(ctx, m, n, a, lda)?;
1472    require_workspace(workspace.len(), lwork)?;
1473    unsafe {
1474        try_ffi!(sys::cusolverDnDgetrf(
1475            ctx.as_raw(),
1476            to_i32(m, "m")?,
1477            to_i32(n, "n")?,
1478            a.as_mut_ptr().cast(),
1479            to_i32(lda, "lda")?,
1480            workspace.as_mut_ptr().cast(),
1481            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1482            dev_info.as_mut_ptr().cast(),
1483        ))?;
1484    }
1485    Ok(())
1486}
1487
1488/// Use the matching buffer-size helper to calculate the required workspace size.
1489///
1490/// The S and D data types are real single and double precision, respectively.
1491///
1492/// The C and Z data types are complex valued single and double precision, respectively.
1493///
1494/// Computes the LU factorization of an $m \times n$ matrix
1495///
1496/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1497///
1498/// Provide workspace through `workspace`.
1499/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1500///
1501/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1502///
1503/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1504///
1505/// If `pivots` is `None`, no pivoting is performed.
1506/// The factorization is `A=L*U`, which is not numerically stable.
1507///
1508/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1509/// sequence. Row `i` is interchanged with row `pivots[i]`.
1510///
1511/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1512///
1513/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1514/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1515///
1516/// # Errors
1517///
1518/// Returns an error if cuSOLVER has not been initialized, if the
1519/// matrix dimensions or leading dimension are invalid, if the current GPU
1520/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1521pub fn cgetrf(
1522    ctx: &Context,
1523    m: usize,
1524    n: usize,
1525    a: &mut DeviceMemory<Complex32>,
1526    lda: usize,
1527    workspace: &mut DeviceMemory<Complex32>,
1528    pivots: Option<&mut DeviceMemory<i32>>,
1529    dev_info: &mut DeviceMemory<i32>,
1530) -> Result<()> {
1531    ctx.bind()?;
1532    validate_matrix(m, n, a.len(), lda)?;
1533    require_info_buffer(dev_info)?;
1534    if let Some(pivots) = pivots.as_ref() {
1535        require_pivot_buffer(pivots, m.min(n))?;
1536    }
1537    let lwork = cgetrf_buffer_size(ctx, m, n, a, lda)?;
1538    require_workspace(workspace.len(), lwork)?;
1539    unsafe {
1540        try_ffi!(sys::cusolverDnCgetrf(
1541            ctx.as_raw(),
1542            to_i32(m, "m")?,
1543            to_i32(n, "n")?,
1544            a.as_mut_ptr().cast(),
1545            to_i32(lda, "lda")?,
1546            workspace.as_mut_ptr().cast(),
1547            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1548            dev_info.as_mut_ptr().cast(),
1549        ))?;
1550    }
1551    Ok(())
1552}
1553
1554/// Use the matching buffer-size helper to calculate the required workspace size.
1555///
1556/// The S and D data types are real single and double precision, respectively.
1557///
1558/// The C and Z data types are complex valued single and double precision, respectively.
1559///
1560/// Computes the LU factorization of an $m \times n$ matrix
1561///
1562/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1563///
1564/// Provide workspace through `workspace`.
1565/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1566///
1567/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1568///
1569/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1570///
1571/// If `pivots` is `None`, no pivoting is performed.
1572/// The factorization is `A=L*U`, which is not numerically stable.
1573///
1574/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1575/// sequence. Row `i` is interchanged with row `pivots[i]`.
1576///
1577/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1578///
1579/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1580/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1581///
1582/// # Errors
1583///
1584/// Returns an error if cuSOLVER has not been initialized, if the
1585/// matrix dimensions or leading dimension are invalid, if the current GPU
1586/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1587pub fn zgetrf(
1588    ctx: &Context,
1589    m: usize,
1590    n: usize,
1591    a: &mut DeviceMemory<Complex64>,
1592    lda: usize,
1593    workspace: &mut DeviceMemory<Complex64>,
1594    pivots: Option<&mut DeviceMemory<i32>>,
1595    dev_info: &mut DeviceMemory<i32>,
1596) -> Result<()> {
1597    ctx.bind()?;
1598    validate_matrix(m, n, a.len(), lda)?;
1599    require_info_buffer(dev_info)?;
1600    if let Some(pivots) = pivots.as_ref() {
1601        require_pivot_buffer(pivots, m.min(n))?;
1602    }
1603    let lwork = zgetrf_buffer_size(ctx, m, n, a, lda)?;
1604    require_workspace(workspace.len(), lwork)?;
1605    unsafe {
1606        try_ffi!(sys::cusolverDnZgetrf(
1607            ctx.as_raw(),
1608            to_i32(m, "m")?,
1609            to_i32(n, "n")?,
1610            a.as_mut_ptr().cast(),
1611            to_i32(lda, "lda")?,
1612            workspace.as_mut_ptr().cast(),
1613            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1614            dev_info.as_mut_ptr().cast(),
1615        ))?;
1616    }
1617    Ok(())
1618}
1619
1620///
1621/// Solves a linear system of multiple right-hand sides
1622///
1623/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1624/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1625///
1626/// The `operation` argument is described by [`Operation`].
1627///
1628/// `pivots` is returned by the matching `getrf` operation.
1629/// It contains pivot indices, which are used to permute right-hand sides.
1630///
1631/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1632///
1633/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1634///
1635/// # Errors
1636///
1637/// Returns an error if cuSOLVER has not been initialized, if the
1638/// matrix dimensions or leading dimensions are invalid, if the current GPU
1639/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1640pub fn sgetrs(
1641    ctx: &Context,
1642    operation: Operation,
1643    n: usize,
1644    nrhs: usize,
1645    a: &DeviceMemory<f32>,
1646    lda: usize,
1647    pivots: &DeviceMemory<i32>,
1648    b: &mut DeviceMemory<f32>,
1649    ldb: usize,
1650    dev_info: &mut DeviceMemory<i32>,
1651) -> Result<()> {
1652    ctx.bind()?;
1653    validate_square_matrix(n, a.len(), lda)?;
1654    validate_matrix(n, nrhs, b.len(), ldb)?;
1655    require_pivot_buffer(pivots, n)?;
1656    require_info_buffer(dev_info)?;
1657    unsafe {
1658        try_ffi!(sys::cusolverDnSgetrs(
1659            ctx.as_raw(),
1660            operation.into(),
1661            to_i32(n, "n")?,
1662            to_i32(nrhs, "nrhs")?,
1663            a.as_ptr().cast(),
1664            to_i32(lda, "lda")?,
1665            pivots.as_ptr().cast(),
1666            b.as_mut_ptr().cast(),
1667            to_i32(ldb, "ldb")?,
1668            dev_info.as_mut_ptr().cast(),
1669        ))?;
1670    }
1671    Ok(())
1672}
1673
1674///
1675/// Solves a linear system of multiple right-hand sides
1676///
1677/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1678/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1679///
1680/// The `operation` argument is described by [`Operation`].
1681///
1682/// `pivots` is returned by the matching `getrf` operation.
1683/// It contains pivot indices, which are used to permute right-hand sides.
1684///
1685/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1686///
1687/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1688///
1689/// # Errors
1690///
1691/// Returns an error if cuSOLVER has not been initialized, if the
1692/// matrix dimensions or leading dimensions are invalid, if the current GPU
1693/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1694pub fn dgetrs(
1695    ctx: &Context,
1696    operation: Operation,
1697    n: usize,
1698    nrhs: usize,
1699    a: &DeviceMemory<f64>,
1700    lda: usize,
1701    pivots: &DeviceMemory<i32>,
1702    b: &mut DeviceMemory<f64>,
1703    ldb: usize,
1704    dev_info: &mut DeviceMemory<i32>,
1705) -> Result<()> {
1706    ctx.bind()?;
1707    validate_square_matrix(n, a.len(), lda)?;
1708    validate_matrix(n, nrhs, b.len(), ldb)?;
1709    require_pivot_buffer(pivots, n)?;
1710    require_info_buffer(dev_info)?;
1711    unsafe {
1712        try_ffi!(sys::cusolverDnDgetrs(
1713            ctx.as_raw(),
1714            operation.into(),
1715            to_i32(n, "n")?,
1716            to_i32(nrhs, "nrhs")?,
1717            a.as_ptr().cast(),
1718            to_i32(lda, "lda")?,
1719            pivots.as_ptr().cast(),
1720            b.as_mut_ptr().cast(),
1721            to_i32(ldb, "ldb")?,
1722            dev_info.as_mut_ptr().cast(),
1723        ))?;
1724    }
1725    Ok(())
1726}
1727
1728///
1729/// Solves a linear system of multiple right-hand sides
1730///
1731/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1732/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1733///
1734/// The `operation` argument is described by [`Operation`].
1735///
1736/// `pivots` is returned by the matching `getrf` operation.
1737/// It contains pivot indices, which are used to permute right-hand sides.
1738///
1739/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1740///
1741/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1742///
1743/// # Errors
1744///
1745/// Returns an error if cuSOLVER has not been initialized, if the
1746/// matrix dimensions or leading dimensions are invalid, if the current GPU
1747/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1748pub fn cgetrs(
1749    ctx: &Context,
1750    operation: Operation,
1751    n: usize,
1752    nrhs: usize,
1753    a: &DeviceMemory<Complex32>,
1754    lda: usize,
1755    pivots: &DeviceMemory<i32>,
1756    b: &mut DeviceMemory<Complex32>,
1757    ldb: usize,
1758    dev_info: &mut DeviceMemory<i32>,
1759) -> Result<()> {
1760    ctx.bind()?;
1761    validate_square_matrix(n, a.len(), lda)?;
1762    validate_matrix(n, nrhs, b.len(), ldb)?;
1763    require_pivot_buffer(pivots, n)?;
1764    require_info_buffer(dev_info)?;
1765    unsafe {
1766        try_ffi!(sys::cusolverDnCgetrs(
1767            ctx.as_raw(),
1768            operation.into(),
1769            to_i32(n, "n")?,
1770            to_i32(nrhs, "nrhs")?,
1771            a.as_ptr().cast(),
1772            to_i32(lda, "lda")?,
1773            pivots.as_ptr().cast(),
1774            b.as_mut_ptr().cast(),
1775            to_i32(ldb, "ldb")?,
1776            dev_info.as_mut_ptr().cast(),
1777        ))?;
1778    }
1779    Ok(())
1780}
1781
1782///
1783/// Solves a linear system of multiple right-hand sides
1784///
1785/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1786/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1787///
1788/// The `operation` argument is described by [`Operation`].
1789///
1790/// `pivots` is returned by the matching `getrf` operation.
1791/// It contains pivot indices, which are used to permute right-hand sides.
1792///
1793/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1794///
1795/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1796///
1797/// # Errors
1798///
1799/// Returns an error if cuSOLVER has not been initialized, if the
1800/// matrix dimensions or leading dimensions are invalid, if the current GPU
1801/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1802pub fn zgetrs(
1803    ctx: &Context,
1804    operation: Operation,
1805    n: usize,
1806    nrhs: usize,
1807    a: &DeviceMemory<Complex64>,
1808    lda: usize,
1809    pivots: &DeviceMemory<i32>,
1810    b: &mut DeviceMemory<Complex64>,
1811    ldb: usize,
1812    dev_info: &mut DeviceMemory<i32>,
1813) -> Result<()> {
1814    ctx.bind()?;
1815    validate_square_matrix(n, a.len(), lda)?;
1816    validate_matrix(n, nrhs, b.len(), ldb)?;
1817    require_pivot_buffer(pivots, n)?;
1818    require_info_buffer(dev_info)?;
1819    unsafe {
1820        try_ffi!(sys::cusolverDnZgetrs(
1821            ctx.as_raw(),
1822            operation.into(),
1823            to_i32(n, "n")?,
1824            to_i32(nrhs, "nrhs")?,
1825            a.as_ptr().cast(),
1826            to_i32(lda, "lda")?,
1827            pivots.as_ptr().cast(),
1828            b.as_mut_ptr().cast(),
1829            to_i32(ldb, "ldb")?,
1830            dev_info.as_mut_ptr().cast(),
1831        ))?;
1832    }
1833    Ok(())
1834}
1835
1836pub fn ssytrf_buffer_size(
1837    ctx: &Context,
1838    n: usize,
1839    a: &mut DeviceMemory<f32>,
1840    lda: usize,
1841) -> Result<usize> {
1842    ctx.bind()?;
1843    validate_square_matrix(n, a.len(), lda)?;
1844    let mut lwork = 0;
1845    unsafe {
1846        try_ffi!(sys::cusolverDnSsytrf_bufferSize(
1847            ctx.as_raw(),
1848            to_i32(n, "n")?,
1849            a.as_mut_ptr().cast(),
1850            to_i32(lda, "lda")?,
1851            &raw mut lwork,
1852        ))?;
1853    }
1854    to_usize(lwork, "lwork")
1855}
1856
1857pub fn dsytrf_buffer_size(
1858    ctx: &Context,
1859    n: usize,
1860    a: &mut DeviceMemory<f64>,
1861    lda: usize,
1862) -> Result<usize> {
1863    ctx.bind()?;
1864    validate_square_matrix(n, a.len(), lda)?;
1865    let mut lwork = 0;
1866    unsafe {
1867        try_ffi!(sys::cusolverDnDsytrf_bufferSize(
1868            ctx.as_raw(),
1869            to_i32(n, "n")?,
1870            a.as_mut_ptr().cast(),
1871            to_i32(lda, "lda")?,
1872            &raw mut lwork,
1873        ))?;
1874    }
1875    to_usize(lwork, "lwork")
1876}
1877
1878pub fn csytrf_buffer_size(
1879    ctx: &Context,
1880    n: usize,
1881    a: &mut DeviceMemory<Complex32>,
1882    lda: usize,
1883) -> Result<usize> {
1884    ctx.bind()?;
1885    validate_square_matrix(n, a.len(), lda)?;
1886    let mut lwork = 0;
1887    unsafe {
1888        try_ffi!(sys::cusolverDnCsytrf_bufferSize(
1889            ctx.as_raw(),
1890            to_i32(n, "n")?,
1891            a.as_mut_ptr().cast(),
1892            to_i32(lda, "lda")?,
1893            &raw mut lwork,
1894        ))?;
1895    }
1896    to_usize(lwork, "lwork")
1897}
1898
1899pub fn zsytrf_buffer_size(
1900    ctx: &Context,
1901    n: usize,
1902    a: &mut DeviceMemory<Complex64>,
1903    lda: usize,
1904) -> Result<usize> {
1905    ctx.bind()?;
1906    validate_square_matrix(n, a.len(), lda)?;
1907    let mut lwork = 0;
1908    unsafe {
1909        try_ffi!(sys::cusolverDnZsytrf_bufferSize(
1910            ctx.as_raw(),
1911            to_i32(n, "n")?,
1912            a.as_mut_ptr().cast(),
1913            to_i32(lda, "lda")?,
1914            &raw mut lwork,
1915        ))?;
1916    }
1917    to_usize(lwork, "lwork")
1918}
1919
1920/// Use the matching buffer-size helper to calculate the required workspace size.
1921///
1922/// The S and D data types are real valued single and double precision, respectively.
1923///
1924/// The C and Z data types are complex valued single and double precision, respectively.
1925///
1926/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
1927///
1928/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
1929/// `fill_mode` indicates which part of the matrix is used.
1930/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
1931///
1932/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
1933/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
1934///
1935/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
1936///
1937/// Provide workspace through `workspace`.
1938/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1939/// The workspace size in bytes is `size_of::<T>() * lwork`.
1940/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
1941///
1942/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
1943/// `dev_info = i` indicates `D(i, i) = 0`.
1944///
1945/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1946///
1947/// `pivots` contains the pivoting sequence.
1948/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
1949/// is interchanged with row/column `k`.
1950/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
1951/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
1952/// with row/column `m`.
1953/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
1954/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
1955/// with row/column `m`.
1956///
1957/// # Errors
1958///
1959/// Returns an error if cuSOLVER has not been initialized, if the
1960/// matrix dimensions or leading dimension are invalid, if the current GPU
1961/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1962pub fn ssytrf(
1963    ctx: &Context,
1964    fill_mode: FillMode,
1965    n: usize,
1966    a: &mut DeviceMemory<f32>,
1967    lda: usize,
1968    pivots: Option<&mut DeviceMemory<i32>>,
1969    workspace: &mut DeviceMemory<f32>,
1970    dev_info: &mut DeviceMemory<i32>,
1971) -> Result<()> {
1972    ctx.bind()?;
1973    validate_square_matrix(n, a.len(), lda)?;
1974    if let Some(pivots) = pivots.as_ref() {
1975        require_pivot_buffer(pivots, n)?;
1976    }
1977    require_info_buffer(dev_info)?;
1978    let lwork = ssytrf_buffer_size(ctx, n, a, lda)?;
1979    require_workspace(workspace.len(), lwork)?;
1980    unsafe {
1981        try_ffi!(sys::cusolverDnSsytrf(
1982            ctx.as_raw(),
1983            fill_mode.into(),
1984            to_i32(n, "n")?,
1985            a.as_mut_ptr().cast(),
1986            to_i32(lda, "lda")?,
1987            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1988            workspace.as_mut_ptr().cast(),
1989            to_i32(lwork, "lwork")?,
1990            dev_info.as_mut_ptr().cast(),
1991        ))?;
1992    }
1993    Ok(())
1994}
1995
1996/// Use the matching buffer-size helper to calculate the required workspace size.
1997///
1998/// The S and D data types are real valued single and double precision, respectively.
1999///
2000/// The C and Z data types are complex valued single and double precision, respectively.
2001///
2002/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2003///
2004/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2005/// `fill_mode` indicates which part of the matrix is used.
2006/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2007///
2008/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2009/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2010///
2011/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2012///
2013/// Provide workspace through `workspace`.
2014/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2015/// The workspace size in bytes is `size_of::<T>() * lwork`.
2016/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2017///
2018/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2019/// `dev_info = i` indicates `D(i, i) = 0`.
2020///
2021/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2022///
2023/// `pivots` contains the pivoting sequence.
2024/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2025/// is interchanged with row/column `k`.
2026/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2027/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2028/// with row/column `m`.
2029/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2030/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2031/// with row/column `m`.
2032///
2033/// # Errors
2034///
2035/// Returns an error if cuSOLVER has not been initialized, if the
2036/// matrix dimensions or leading dimension are invalid, if the current GPU
2037/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2038pub fn dsytrf(
2039    ctx: &Context,
2040    fill_mode: FillMode,
2041    n: usize,
2042    a: &mut DeviceMemory<f64>,
2043    lda: usize,
2044    pivots: Option<&mut DeviceMemory<i32>>,
2045    workspace: &mut DeviceMemory<f64>,
2046    dev_info: &mut DeviceMemory<i32>,
2047) -> Result<()> {
2048    ctx.bind()?;
2049    validate_square_matrix(n, a.len(), lda)?;
2050    if let Some(pivots) = pivots.as_ref() {
2051        require_pivot_buffer(pivots, n)?;
2052    }
2053    require_info_buffer(dev_info)?;
2054    let lwork = dsytrf_buffer_size(ctx, n, a, lda)?;
2055    require_workspace(workspace.len(), lwork)?;
2056    unsafe {
2057        try_ffi!(sys::cusolverDnDsytrf(
2058            ctx.as_raw(),
2059            fill_mode.into(),
2060            to_i32(n, "n")?,
2061            a.as_mut_ptr().cast(),
2062            to_i32(lda, "lda")?,
2063            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2064            workspace.as_mut_ptr().cast(),
2065            to_i32(lwork, "lwork")?,
2066            dev_info.as_mut_ptr().cast(),
2067        ))?;
2068    }
2069    Ok(())
2070}
2071
2072/// Use the matching buffer-size helper to calculate the required workspace size.
2073///
2074/// The S and D data types are real valued single and double precision, respectively.
2075///
2076/// The C and Z data types are complex valued single and double precision, respectively.
2077///
2078/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2079///
2080/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2081/// `fill_mode` indicates which part of the matrix is used.
2082/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2083///
2084/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2085/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2086///
2087/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2088///
2089/// Provide workspace through `workspace`.
2090/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2091/// The workspace size in bytes is `size_of::<T>() * lwork`.
2092/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2093///
2094/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2095/// `dev_info = i` indicates `D(i, i) = 0`.
2096///
2097/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2098///
2099/// `pivots` contains the pivoting sequence.
2100/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2101/// is interchanged with row/column `k`.
2102/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2103/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2104/// with row/column `m`.
2105/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2106/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2107/// with row/column `m`.
2108///
2109/// # Errors
2110///
2111/// Returns an error if cuSOLVER has not been initialized, if the
2112/// matrix dimensions or leading dimension are invalid, if the current GPU
2113/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2114pub fn csytrf(
2115    ctx: &Context,
2116    fill_mode: FillMode,
2117    n: usize,
2118    a: &mut DeviceMemory<Complex32>,
2119    lda: usize,
2120    pivots: Option<&mut DeviceMemory<i32>>,
2121    workspace: &mut DeviceMemory<Complex32>,
2122    dev_info: &mut DeviceMemory<i32>,
2123) -> Result<()> {
2124    ctx.bind()?;
2125    validate_square_matrix(n, a.len(), lda)?;
2126    if let Some(pivots) = pivots.as_ref() {
2127        require_pivot_buffer(pivots, n)?;
2128    }
2129    require_info_buffer(dev_info)?;
2130    let lwork = csytrf_buffer_size(ctx, n, a, lda)?;
2131    require_workspace(workspace.len(), lwork)?;
2132    unsafe {
2133        try_ffi!(sys::cusolverDnCsytrf(
2134            ctx.as_raw(),
2135            fill_mode.into(),
2136            to_i32(n, "n")?,
2137            a.as_mut_ptr().cast(),
2138            to_i32(lda, "lda")?,
2139            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2140            workspace.as_mut_ptr().cast(),
2141            to_i32(lwork, "lwork")?,
2142            dev_info.as_mut_ptr().cast(),
2143        ))?;
2144    }
2145    Ok(())
2146}
2147
2148/// Use the matching buffer-size helper to calculate the required workspace size.
2149///
2150/// The S and D data types are real valued single and double precision, respectively.
2151///
2152/// The C and Z data types are complex valued single and double precision, respectively.
2153///
2154/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2155///
2156/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2157/// `fill_mode` indicates which part of the matrix is used.
2158/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2159///
2160/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2161/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2162///
2163/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2164///
2165/// Provide workspace through `workspace`.
2166/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2167/// The workspace size in bytes is `size_of::<T>() * lwork`.
2168/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2169///
2170/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2171/// `dev_info = i` indicates `D(i, i) = 0`.
2172///
2173/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2174///
2175/// `pivots` contains the pivoting sequence.
2176/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2177/// is interchanged with row/column `k`.
2178/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2179/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2180/// with row/column `m`.
2181/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2182/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2183/// with row/column `m`.
2184///
2185/// # Errors
2186///
2187/// Returns an error if cuSOLVER has not been initialized, if the
2188/// matrix dimensions or leading dimension are invalid, if the current GPU
2189/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2190pub fn zsytrf(
2191    ctx: &Context,
2192    fill_mode: FillMode,
2193    n: usize,
2194    a: &mut DeviceMemory<Complex64>,
2195    lda: usize,
2196    pivots: Option<&mut DeviceMemory<i32>>,
2197    workspace: &mut DeviceMemory<Complex64>,
2198    dev_info: &mut DeviceMemory<i32>,
2199) -> Result<()> {
2200    ctx.bind()?;
2201    validate_square_matrix(n, a.len(), lda)?;
2202    if let Some(pivots) = pivots.as_ref() {
2203        require_pivot_buffer(pivots, n)?;
2204    }
2205    require_info_buffer(dev_info)?;
2206    let lwork = zsytrf_buffer_size(ctx, n, a, lda)?;
2207    require_workspace(workspace.len(), lwork)?;
2208    unsafe {
2209        try_ffi!(sys::cusolverDnZsytrf(
2210            ctx.as_raw(),
2211            fill_mode.into(),
2212            to_i32(n, "n")?,
2213            a.as_mut_ptr().cast(),
2214            to_i32(lda, "lda")?,
2215            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2216            workspace.as_mut_ptr().cast(),
2217            to_i32(lwork, "lwork")?,
2218            dev_info.as_mut_ptr().cast(),
2219        ))?;
2220    }
2221    Ok(())
2222}
2223
2224pub fn sgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2225    ctx.bind()?;
2226    validate_bidiagonal_dims(m, n)?;
2227    let mut lwork = 0;
2228    unsafe {
2229        try_ffi!(sys::cusolverDnSgebrd_bufferSize(
2230            ctx.as_raw(),
2231            to_i32(m, "m")?,
2232            to_i32(n, "n")?,
2233            &raw mut lwork,
2234        ))?;
2235    }
2236    to_usize(lwork, "lwork")
2237}
2238
2239pub fn dgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2240    ctx.bind()?;
2241    validate_bidiagonal_dims(m, n)?;
2242    let mut lwork = 0;
2243    unsafe {
2244        try_ffi!(sys::cusolverDnDgebrd_bufferSize(
2245            ctx.as_raw(),
2246            to_i32(m, "m")?,
2247            to_i32(n, "n")?,
2248            &raw mut lwork,
2249        ))?;
2250    }
2251    to_usize(lwork, "lwork")
2252}
2253
2254pub fn cgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2255    ctx.bind()?;
2256    validate_bidiagonal_dims(m, n)?;
2257    let mut lwork = 0;
2258    unsafe {
2259        try_ffi!(sys::cusolverDnCgebrd_bufferSize(
2260            ctx.as_raw(),
2261            to_i32(m, "m")?,
2262            to_i32(n, "n")?,
2263            &raw mut lwork,
2264        ))?;
2265    }
2266    to_usize(lwork, "lwork")
2267}
2268
2269pub fn zgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2270    ctx.bind()?;
2271    validate_bidiagonal_dims(m, n)?;
2272    let mut lwork = 0;
2273    unsafe {
2274        try_ffi!(sys::cusolverDnZgebrd_bufferSize(
2275            ctx.as_raw(),
2276            to_i32(m, "m")?,
2277            to_i32(n, "n")?,
2278            &raw mut lwork,
2279        ))?;
2280    }
2281    to_usize(lwork, "lwork")
2282}
2283
2284/// Use the matching buffer-size helper to calculate the required workspace size.
2285///
2286/// The S and D data types are real valued single and double precision, respectively.
2287///
2288/// The C and Z data types are complex valued single and double precision, respectively.
2289///
2290/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2291/// bidiagonal form `B` by an orthogonal transformation:
2292/// $Q^{H}\cdot A\cdot P = B$.
2293///
2294/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2295/// bidiagonal.
2296///
2297/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2298///
2299/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2300///   the upper bidiagonal matrix `B`. Elements below the diagonal, together
2301///   with `tauq`, represent `Q`; elements above the first superdiagonal,
2302///   together with `taup`, represent `P`.
2303/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2304///   lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2305///   together with `tauq`, represent `Q`; elements above the diagonal,
2306///   together with `taup`, represent `P`.
2307///
2308/// Provide workspace through `workspace`.
2309/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2310///
2311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2312///
2313/// `gebrd` only supports `m >= n`.
2314///
2315/// # Errors
2316///
2317/// Returns an error if cuSOLVER has not been initialized, if the
2318/// matrix dimensions or leading dimension are invalid, if the current GPU
2319/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2320pub fn sgebrd(
2321    ctx: &Context,
2322    m: usize,
2323    n: usize,
2324    a: &mut DeviceMemory<f32>,
2325    lda: usize,
2326    d: &mut DeviceMemory<f32>,
2327    e: &mut DeviceMemory<f32>,
2328    tauq: &mut DeviceMemory<f32>,
2329    taup: &mut DeviceMemory<f32>,
2330    workspace: &mut DeviceMemory<f32>,
2331    dev_info: &mut DeviceMemory<i32>,
2332) -> Result<()> {
2333    ctx.bind()?;
2334    validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2335    require_info_buffer(dev_info)?;
2336    let lwork = sgebrd_buffer_size(ctx, m, n)?;
2337    require_workspace(workspace.len(), lwork)?;
2338    unsafe {
2339        try_ffi!(sys::cusolverDnSgebrd(
2340            ctx.as_raw(),
2341            to_i32(m, "m")?,
2342            to_i32(n, "n")?,
2343            a.as_mut_ptr().cast(),
2344            to_i32(lda, "lda")?,
2345            d.as_mut_ptr().cast(),
2346            e.as_mut_ptr().cast(),
2347            tauq.as_mut_ptr().cast(),
2348            taup.as_mut_ptr().cast(),
2349            workspace.as_mut_ptr().cast(),
2350            to_i32(lwork, "lwork")?,
2351            dev_info.as_mut_ptr().cast(),
2352        ))?;
2353    }
2354    Ok(())
2355}
2356
2357/// Use the matching buffer-size helper to calculate the required workspace size.
2358///
2359/// The S and D data types are real valued single and double precision, respectively.
2360///
2361/// The C and Z data types are complex valued single and double precision, respectively.
2362///
2363/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2364/// bidiagonal form `B` by an orthogonal transformation:
2365/// $Q^{H}\cdot A\cdot P = B$.
2366///
2367/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2368/// bidiagonal.
2369///
2370/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2371///
2372/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2373///   the upper bidiagonal matrix `B`. Elements below the diagonal, together
2374///   with `tauq`, represent `Q`; elements above the first superdiagonal,
2375///   together with `taup`, represent `P`.
2376/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2377///   lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2378///   together with `tauq`, represent `Q`; elements above the diagonal,
2379///   together with `taup`, represent `P`.
2380///
2381/// Provide workspace through `workspace`.
2382/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2383///
2384/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2385///
2386/// `gebrd` only supports `m >= n`.
2387///
2388/// # Errors
2389///
2390/// Returns an error if cuSOLVER has not been initialized, if the
2391/// matrix dimensions or leading dimension are invalid, if the current GPU
2392/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2393pub fn dgebrd(
2394    ctx: &Context,
2395    m: usize,
2396    n: usize,
2397    a: &mut DeviceMemory<f64>,
2398    lda: usize,
2399    d: &mut DeviceMemory<f64>,
2400    e: &mut DeviceMemory<f64>,
2401    tauq: &mut DeviceMemory<f64>,
2402    taup: &mut DeviceMemory<f64>,
2403    workspace: &mut DeviceMemory<f64>,
2404    dev_info: &mut DeviceMemory<i32>,
2405) -> Result<()> {
2406    ctx.bind()?;
2407    validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2408    require_info_buffer(dev_info)?;
2409    let lwork = dgebrd_buffer_size(ctx, m, n)?;
2410    require_workspace(workspace.len(), lwork)?;
2411    unsafe {
2412        try_ffi!(sys::cusolverDnDgebrd(
2413            ctx.as_raw(),
2414            to_i32(m, "m")?,
2415            to_i32(n, "n")?,
2416            a.as_mut_ptr().cast(),
2417            to_i32(lda, "lda")?,
2418            d.as_mut_ptr().cast(),
2419            e.as_mut_ptr().cast(),
2420            tauq.as_mut_ptr().cast(),
2421            taup.as_mut_ptr().cast(),
2422            workspace.as_mut_ptr().cast(),
2423            to_i32(lwork, "lwork")?,
2424            dev_info.as_mut_ptr().cast(),
2425        ))?;
2426    }
2427    Ok(())
2428}
2429
2430/// Use the matching buffer-size helper to calculate the required workspace size.
2431///
2432/// The S and D data types are real valued single and double precision, respectively.
2433///
2434/// The C and Z data types are complex valued single and double precision, respectively.
2435///
2436/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2437/// bidiagonal form `B` by an orthogonal transformation:
2438/// $Q^{H}\cdot A\cdot P = B$.
2439///
2440/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2441/// bidiagonal.
2442///
2443/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2444///
2445/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2446///   the upper bidiagonal matrix `B`. Elements below the diagonal, together
2447///   with `tauq`, represent `Q`; elements above the first superdiagonal,
2448///   together with `taup`, represent `P`.
2449/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2450///   lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2451///   together with `tauq`, represent `Q`; elements above the diagonal,
2452///   together with `taup`, represent `P`.
2453///
2454/// Provide workspace through `workspace`.
2455/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2456///
2457/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2458///
2459/// `gebrd` only supports `m >= n`.
2460///
2461/// # Errors
2462///
2463/// Returns an error if cuSOLVER has not been initialized, if the
2464/// matrix dimensions or leading dimension are invalid, if the current GPU
2465/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2466pub fn cgebrd(
2467    ctx: &Context,
2468    m: usize,
2469    n: usize,
2470    a: &mut DeviceMemory<Complex32>,
2471    lda: usize,
2472    d: &mut DeviceMemory<f32>,
2473    e: &mut DeviceMemory<f32>,
2474    tauq: &mut DeviceMemory<Complex32>,
2475    taup: &mut DeviceMemory<Complex32>,
2476    workspace: &mut DeviceMemory<Complex32>,
2477    dev_info: &mut DeviceMemory<i32>,
2478) -> Result<()> {
2479    ctx.bind()?;
2480    validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2481    require_info_buffer(dev_info)?;
2482    let lwork = cgebrd_buffer_size(ctx, m, n)?;
2483    require_workspace(workspace.len(), lwork)?;
2484    unsafe {
2485        try_ffi!(sys::cusolverDnCgebrd(
2486            ctx.as_raw(),
2487            to_i32(m, "m")?,
2488            to_i32(n, "n")?,
2489            a.as_mut_ptr().cast(),
2490            to_i32(lda, "lda")?,
2491            d.as_mut_ptr().cast(),
2492            e.as_mut_ptr().cast(),
2493            tauq.as_mut_ptr().cast(),
2494            taup.as_mut_ptr().cast(),
2495            workspace.as_mut_ptr().cast(),
2496            to_i32(lwork, "lwork")?,
2497            dev_info.as_mut_ptr().cast(),
2498        ))?;
2499    }
2500    Ok(())
2501}
2502
2503/// Use the matching buffer-size helper to calculate the required workspace size.
2504///
2505/// The S and D data types are real valued single and double precision, respectively.
2506///
2507/// The C and Z data types are complex valued single and double precision, respectively.
2508///
2509/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2510/// bidiagonal form `B` by an orthogonal transformation:
2511/// $Q^{H}\cdot A\cdot P = B$.
2512///
2513/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2514/// bidiagonal.
2515///
2516/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2517///
2518/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2519///   the upper bidiagonal matrix `B`. Elements below the diagonal, together
2520///   with `tauq`, represent `Q`; elements above the first superdiagonal,
2521///   together with `taup`, represent `P`.
2522/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2523///   lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2524///   together with `tauq`, represent `Q`; elements above the diagonal,
2525///   together with `taup`, represent `P`.
2526///
2527/// Provide workspace through `workspace`.
2528/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2529///
2530/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2531///
2532/// `gebrd` only supports `m >= n`.
2533///
2534/// # Errors
2535///
2536/// Returns an error if cuSOLVER has not been initialized, if the
2537/// matrix dimensions or leading dimension are invalid, if the current GPU
2538/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2539pub fn zgebrd(
2540    ctx: &Context,
2541    m: usize,
2542    n: usize,
2543    a: &mut DeviceMemory<Complex64>,
2544    lda: usize,
2545    d: &mut DeviceMemory<f64>,
2546    e: &mut DeviceMemory<f64>,
2547    tauq: &mut DeviceMemory<Complex64>,
2548    taup: &mut DeviceMemory<Complex64>,
2549    workspace: &mut DeviceMemory<Complex64>,
2550    dev_info: &mut DeviceMemory<i32>,
2551) -> Result<()> {
2552    ctx.bind()?;
2553    validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2554    require_info_buffer(dev_info)?;
2555    let lwork = zgebrd_buffer_size(ctx, m, n)?;
2556    require_workspace(workspace.len(), lwork)?;
2557    unsafe {
2558        try_ffi!(sys::cusolverDnZgebrd(
2559            ctx.as_raw(),
2560            to_i32(m, "m")?,
2561            to_i32(n, "n")?,
2562            a.as_mut_ptr().cast(),
2563            to_i32(lda, "lda")?,
2564            d.as_mut_ptr().cast(),
2565            e.as_mut_ptr().cast(),
2566            tauq.as_mut_ptr().cast(),
2567            taup.as_mut_ptr().cast(),
2568            workspace.as_mut_ptr().cast(),
2569            to_i32(lwork, "lwork")?,
2570            dev_info.as_mut_ptr().cast(),
2571        ))?;
2572    }
2573    Ok(())
2574}
2575
2576pub fn sorgbr_buffer_size(
2577    ctx: &Context,
2578    side: SideMode,
2579    m: usize,
2580    n: usize,
2581    k: usize,
2582    a: &DeviceMemory<f32>,
2583    lda: usize,
2584    tau: &DeviceMemory<f32>,
2585) -> Result<usize> {
2586    ctx.bind()?;
2587    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2588    let mut lwork = 0;
2589    unsafe {
2590        try_ffi!(sys::cusolverDnSorgbr_bufferSize(
2591            ctx.as_raw(),
2592            side.into(),
2593            to_i32(m, "m")?,
2594            to_i32(n, "n")?,
2595            to_i32(k, "k")?,
2596            a.as_ptr().cast(),
2597            to_i32(lda, "lda")?,
2598            tau.as_ptr().cast(),
2599            &raw mut lwork,
2600        ))?;
2601    }
2602    to_usize(lwork, "lwork")
2603}
2604
2605pub fn dorgbr_buffer_size(
2606    ctx: &Context,
2607    side: SideMode,
2608    m: usize,
2609    n: usize,
2610    k: usize,
2611    a: &DeviceMemory<f64>,
2612    lda: usize,
2613    tau: &DeviceMemory<f64>,
2614) -> Result<usize> {
2615    ctx.bind()?;
2616    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2617    let mut lwork = 0;
2618    unsafe {
2619        try_ffi!(sys::cusolverDnDorgbr_bufferSize(
2620            ctx.as_raw(),
2621            side.into(),
2622            to_i32(m, "m")?,
2623            to_i32(n, "n")?,
2624            to_i32(k, "k")?,
2625            a.as_ptr().cast(),
2626            to_i32(lda, "lda")?,
2627            tau.as_ptr().cast(),
2628            &raw mut lwork,
2629        ))?;
2630    }
2631    to_usize(lwork, "lwork")
2632}
2633
2634pub fn cungbr_buffer_size(
2635    ctx: &Context,
2636    side: SideMode,
2637    m: usize,
2638    n: usize,
2639    k: usize,
2640    a: &DeviceMemory<Complex32>,
2641    lda: usize,
2642    tau: &DeviceMemory<Complex32>,
2643) -> Result<usize> {
2644    ctx.bind()?;
2645    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2646    let mut lwork = 0;
2647    unsafe {
2648        try_ffi!(sys::cusolverDnCungbr_bufferSize(
2649            ctx.as_raw(),
2650            side.into(),
2651            to_i32(m, "m")?,
2652            to_i32(n, "n")?,
2653            to_i32(k, "k")?,
2654            a.as_ptr().cast(),
2655            to_i32(lda, "lda")?,
2656            tau.as_ptr().cast(),
2657            &raw mut lwork,
2658        ))?;
2659    }
2660    to_usize(lwork, "lwork")
2661}
2662
2663pub fn zungbr_buffer_size(
2664    ctx: &Context,
2665    side: SideMode,
2666    m: usize,
2667    n: usize,
2668    k: usize,
2669    a: &DeviceMemory<Complex64>,
2670    lda: usize,
2671    tau: &DeviceMemory<Complex64>,
2672) -> Result<usize> {
2673    ctx.bind()?;
2674    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2675    let mut lwork = 0;
2676    unsafe {
2677        try_ffi!(sys::cusolverDnZungbr_bufferSize(
2678            ctx.as_raw(),
2679            side.into(),
2680            to_i32(m, "m")?,
2681            to_i32(n, "n")?,
2682            to_i32(k, "k")?,
2683            a.as_ptr().cast(),
2684            to_i32(lda, "lda")?,
2685            tau.as_ptr().cast(),
2686            &raw mut lwork,
2687        ))?;
2688    }
2689    to_usize(lwork, "lwork")
2690}
2691
2692/// Use the matching buffer-size helper to calculate the required workspace size.
2693///
2694/// The S and D data types are real valued single and double precision, respectively.
2695///
2696/// The C and Z data types are complex valued single and double precision, respectively.
2697///
2698/// Generates one of the unitary matrices `Q` or $P^{H}$ determined by `gebrd`
2699/// when reducing matrix `A` to bidiagonal form:
2700/// $Q^{H}\cdot A\cdot P = B$.
2701///
2702/// `Q` and $P^{H}$ are defined as products of elementary reflectors `H(i)`
2703/// or `G(i)`, respectively.
2704///
2705/// Provide workspace through `workspace`.
2706/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2707/// The workspace size in bytes is `size_of::<T>() * lwork`.
2708///
2709/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2710///
2711/// # Errors
2712///
2713/// Returns an error if cuSOLVER has not been initialized, if the
2714/// matrix dimensions or leading dimension are invalid, if the current GPU
2715/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2716pub fn sorgbr(
2717    ctx: &Context,
2718    side: SideMode,
2719    m: usize,
2720    n: usize,
2721    k: usize,
2722    a: &mut DeviceMemory<f32>,
2723    lda: usize,
2724    tau: &DeviceMemory<f32>,
2725    workspace: &mut DeviceMemory<f32>,
2726    dev_info: &mut DeviceMemory<i32>,
2727) -> Result<()> {
2728    ctx.bind()?;
2729    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2730    require_info_buffer(dev_info)?;
2731    let lwork = sorgbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2732    require_workspace(workspace.len(), lwork)?;
2733    unsafe {
2734        try_ffi!(sys::cusolverDnSorgbr(
2735            ctx.as_raw(),
2736            side.into(),
2737            to_i32(m, "m")?,
2738            to_i32(n, "n")?,
2739            to_i32(k, "k")?,
2740            a.as_mut_ptr().cast(),
2741            to_i32(lda, "lda")?,
2742            tau.as_ptr().cast(),
2743            workspace.as_mut_ptr().cast(),
2744            to_i32(lwork, "lwork")?,
2745            dev_info.as_mut_ptr().cast(),
2746        ))?;
2747    }
2748    Ok(())
2749}
2750
2751/// Use the matching buffer-size helper to calculate the required workspace size.
2752///
2753/// The S and D data types are real valued single and double precision, respectively.
2754///
2755/// The C and Z data types are complex valued single and double precision, respectively.
2756///
2757/// Generates one of the unitary matrices `Q` or $P^{H}$ determined by `gebrd`
2758/// when reducing matrix `A` to bidiagonal form:
2759/// $Q^{H}\cdot A\cdot P = B$.
2760///
2761/// `Q` and $P^{H}$ are defined as products of elementary reflectors `H(i)`
2762/// or `G(i)`, respectively.
2763///
2764/// Provide workspace through `workspace`.
2765/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2766/// The workspace size in bytes is `size_of::<T>() * lwork`.
2767///
2768/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2769///
2770/// # Errors
2771///
2772/// Returns an error if cuSOLVER has not been initialized, if the
2773/// matrix dimensions or leading dimension are invalid, if the current GPU
2774/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2775pub fn dorgbr(
2776    ctx: &Context,
2777    side: SideMode,
2778    m: usize,
2779    n: usize,
2780    k: usize,
2781    a: &mut DeviceMemory<f64>,
2782    lda: usize,
2783    tau: &DeviceMemory<f64>,
2784    workspace: &mut DeviceMemory<f64>,
2785    dev_info: &mut DeviceMemory<i32>,
2786) -> Result<()> {
2787    ctx.bind()?;
2788    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2789    require_info_buffer(dev_info)?;
2790    let lwork = dorgbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2791    require_workspace(workspace.len(), lwork)?;
2792    unsafe {
2793        try_ffi!(sys::cusolverDnDorgbr(
2794            ctx.as_raw(),
2795            side.into(),
2796            to_i32(m, "m")?,
2797            to_i32(n, "n")?,
2798            to_i32(k, "k")?,
2799            a.as_mut_ptr().cast(),
2800            to_i32(lda, "lda")?,
2801            tau.as_ptr().cast(),
2802            workspace.as_mut_ptr().cast(),
2803            to_i32(lwork, "lwork")?,
2804            dev_info.as_mut_ptr().cast(),
2805        ))?;
2806    }
2807    Ok(())
2808}
2809
2810pub fn cungbr(
2811    ctx: &Context,
2812    side: SideMode,
2813    m: usize,
2814    n: usize,
2815    k: usize,
2816    a: &mut DeviceMemory<Complex32>,
2817    lda: usize,
2818    tau: &DeviceMemory<Complex32>,
2819    workspace: &mut DeviceMemory<Complex32>,
2820    dev_info: &mut DeviceMemory<i32>,
2821) -> Result<()> {
2822    ctx.bind()?;
2823    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2824    require_info_buffer(dev_info)?;
2825    let lwork = cungbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2826    require_workspace(workspace.len(), lwork)?;
2827    unsafe {
2828        try_ffi!(sys::cusolverDnCungbr(
2829            ctx.as_raw(),
2830            side.into(),
2831            to_i32(m, "m")?,
2832            to_i32(n, "n")?,
2833            to_i32(k, "k")?,
2834            a.as_mut_ptr().cast(),
2835            to_i32(lda, "lda")?,
2836            tau.as_ptr().cast(),
2837            workspace.as_mut_ptr().cast(),
2838            to_i32(lwork, "lwork")?,
2839            dev_info.as_mut_ptr().cast(),
2840        ))?;
2841    }
2842    Ok(())
2843}
2844
2845pub fn zungbr(
2846    ctx: &Context,
2847    side: SideMode,
2848    m: usize,
2849    n: usize,
2850    k: usize,
2851    a: &mut DeviceMemory<Complex64>,
2852    lda: usize,
2853    tau: &DeviceMemory<Complex64>,
2854    workspace: &mut DeviceMemory<Complex64>,
2855    dev_info: &mut DeviceMemory<i32>,
2856) -> Result<()> {
2857    ctx.bind()?;
2858    validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2859    require_info_buffer(dev_info)?;
2860    let lwork = zungbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2861    require_workspace(workspace.len(), lwork)?;
2862    unsafe {
2863        try_ffi!(sys::cusolverDnZungbr(
2864            ctx.as_raw(),
2865            side.into(),
2866            to_i32(m, "m")?,
2867            to_i32(n, "n")?,
2868            to_i32(k, "k")?,
2869            a.as_mut_ptr().cast(),
2870            to_i32(lda, "lda")?,
2871            tau.as_ptr().cast(),
2872            workspace.as_mut_ptr().cast(),
2873            to_i32(lwork, "lwork")?,
2874            dev_info.as_mut_ptr().cast(),
2875        ))?;
2876    }
2877    Ok(())
2878}
2879
2880pub fn ssytrd_buffer_size(
2881    ctx: &Context,
2882    fill_mode: FillMode,
2883    n: usize,
2884    a: &DeviceMemory<f32>,
2885    lda: usize,
2886    d: &DeviceMemory<f32>,
2887    e: &DeviceMemory<f32>,
2888    tau: &DeviceMemory<f32>,
2889) -> Result<usize> {
2890    ctx.bind()?;
2891    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2892    let mut lwork = 0;
2893    unsafe {
2894        try_ffi!(sys::cusolverDnSsytrd_bufferSize(
2895            ctx.as_raw(),
2896            fill_mode.into(),
2897            to_i32(n, "n")?,
2898            a.as_ptr().cast(),
2899            to_i32(lda, "lda")?,
2900            d.as_ptr().cast(),
2901            e.as_ptr().cast(),
2902            tau.as_ptr().cast(),
2903            &raw mut lwork,
2904        ))?;
2905    }
2906    to_usize(lwork, "lwork")
2907}
2908
2909pub fn dsytrd_buffer_size(
2910    ctx: &Context,
2911    fill_mode: FillMode,
2912    n: usize,
2913    a: &DeviceMemory<f64>,
2914    lda: usize,
2915    d: &DeviceMemory<f64>,
2916    e: &DeviceMemory<f64>,
2917    tau: &DeviceMemory<f64>,
2918) -> Result<usize> {
2919    ctx.bind()?;
2920    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2921    let mut lwork = 0;
2922    unsafe {
2923        try_ffi!(sys::cusolverDnDsytrd_bufferSize(
2924            ctx.as_raw(),
2925            fill_mode.into(),
2926            to_i32(n, "n")?,
2927            a.as_ptr().cast(),
2928            to_i32(lda, "lda")?,
2929            d.as_ptr().cast(),
2930            e.as_ptr().cast(),
2931            tau.as_ptr().cast(),
2932            &raw mut lwork,
2933        ))?;
2934    }
2935    to_usize(lwork, "lwork")
2936}
2937
2938pub fn chetrd_buffer_size(
2939    ctx: &Context,
2940    fill_mode: FillMode,
2941    n: usize,
2942    a: &DeviceMemory<Complex32>,
2943    lda: usize,
2944    d: &DeviceMemory<f32>,
2945    e: &DeviceMemory<f32>,
2946    tau: &DeviceMemory<Complex32>,
2947) -> Result<usize> {
2948    ctx.bind()?;
2949    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2950    let mut lwork = 0;
2951    unsafe {
2952        try_ffi!(sys::cusolverDnChetrd_bufferSize(
2953            ctx.as_raw(),
2954            fill_mode.into(),
2955            to_i32(n, "n")?,
2956            a.as_ptr().cast(),
2957            to_i32(lda, "lda")?,
2958            d.as_ptr().cast(),
2959            e.as_ptr().cast(),
2960            tau.as_ptr().cast(),
2961            &raw mut lwork,
2962        ))?;
2963    }
2964    to_usize(lwork, "lwork")
2965}
2966
2967pub fn zhetrd_buffer_size(
2968    ctx: &Context,
2969    fill_mode: FillMode,
2970    n: usize,
2971    a: &DeviceMemory<Complex64>,
2972    lda: usize,
2973    d: &DeviceMemory<f64>,
2974    e: &DeviceMemory<f64>,
2975    tau: &DeviceMemory<Complex64>,
2976) -> Result<usize> {
2977    ctx.bind()?;
2978    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2979    let mut lwork = 0;
2980    unsafe {
2981        try_ffi!(sys::cusolverDnZhetrd_bufferSize(
2982            ctx.as_raw(),
2983            fill_mode.into(),
2984            to_i32(n, "n")?,
2985            a.as_ptr().cast(),
2986            to_i32(lda, "lda")?,
2987            d.as_ptr().cast(),
2988            e.as_ptr().cast(),
2989            tau.as_ptr().cast(),
2990            &raw mut lwork,
2991        ))?;
2992    }
2993    to_usize(lwork, "lwork")
2994}
2995
2996/// Use the matching buffer-size helper to calculate the required workspace size.
2997///
2998/// The S and D data types are real valued single and double precision, respectively.
2999///
3000/// The C and Z data types are complex valued single and double precision, respectively.
3001///
3002/// Reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to the
3003/// real symmetric tridiagonal form `T` by an orthogonal transformation:
3004/// $Q^{H}\cdot A\cdot Q = T$.
3005///
3006/// On output, `A` contains `T` and Householder reflection vectors.
3007/// If `fill_mode` is [`FillMode::Upper`], the diagonal and first
3008/// superdiagonal of `A` are overwritten by `T`; elements above the first
3009/// superdiagonal, together with `tau`, represent `Q`.
3010/// If `fill_mode` is [`FillMode::Lower`], the diagonal and first subdiagonal
3011/// of `A` are overwritten by `T`; elements below the first subdiagonal,
3012/// together with `tau`, represent `Q`.
3013///
3014/// Provide workspace through `workspace`.
3015/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3016/// The workspace size in bytes is `size_of::<T>() * lwork`.
3017///
3018/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3019/// The problem size `n` is limited by `n * lda <= INT32_MAX` primarily due to the current implementation constraints.
3020///
3021/// # Errors
3022///
3023/// Returns an error if cuSOLVER has not been initialized, if the
3024/// matrix dimensions, leading dimension, or fill mode are invalid, if the
3025/// current GPU architecture is unsupported, or if cuSOLVER reports an
3026/// internal failure.
3027pub fn ssytrd(
3028    ctx: &Context,
3029    fill_mode: FillMode,
3030    n: usize,
3031    a: &mut DeviceMemory<f32>,
3032    lda: usize,
3033    d: &mut DeviceMemory<f32>,
3034    e: &mut DeviceMemory<f32>,
3035    tau: &mut DeviceMemory<f32>,
3036    workspace: &mut DeviceMemory<f32>,
3037    dev_info: &mut DeviceMemory<i32>,
3038) -> Result<()> {
3039    ctx.bind()?;
3040    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3041    require_info_buffer(dev_info)?;
3042    let lwork = ssytrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3043    require_workspace(workspace.len(), lwork)?;
3044    unsafe {
3045        try_ffi!(sys::cusolverDnSsytrd(
3046            ctx.as_raw(),
3047            fill_mode.into(),
3048            to_i32(n, "n")?,
3049            a.as_mut_ptr().cast(),
3050            to_i32(lda, "lda")?,
3051            d.as_mut_ptr().cast(),
3052            e.as_mut_ptr().cast(),
3053            tau.as_mut_ptr().cast(),
3054            workspace.as_mut_ptr().cast(),
3055            to_i32(lwork, "lwork")?,
3056            dev_info.as_mut_ptr().cast(),
3057        ))?;
3058    }
3059    Ok(())
3060}
3061
3062/// Use the matching buffer-size helper to calculate the required workspace size.
3063///
3064/// The S and D data types are real valued single and double precision, respectively.
3065///
3066/// The C and Z data types are complex valued single and double precision, respectively.
3067///
3068/// Reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to the
3069/// real symmetric tridiagonal form `T` by an orthogonal transformation:
3070/// $Q^{H}\cdot A\cdot Q = T$.
3071///
3072/// On output, `A` contains `T` and Householder reflection vectors.
3073/// If `fill_mode` is [`FillMode::Upper`], the diagonal and first
3074/// superdiagonal of `A` are overwritten by `T`; elements above the first
3075/// superdiagonal, together with `tau`, represent `Q`.
3076/// If `fill_mode` is [`FillMode::Lower`], the diagonal and first subdiagonal
3077/// of `A` are overwritten by `T`; elements below the first subdiagonal,
3078/// together with `tau`, represent `Q`.
3079///
3080/// Provide workspace through `workspace`.
3081/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3082/// The workspace size in bytes is `size_of::<T>() * lwork`.
3083///
3084/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3085/// The problem size `n` is limited by `n * lda <= INT32_MAX` primarily due to the current implementation constraints.
3086///
3087/// # Errors
3088///
3089/// Returns an error if cuSOLVER has not been initialized, if the
3090/// matrix dimensions, leading dimension, or fill mode are invalid, if the
3091/// current GPU architecture is unsupported, or if cuSOLVER reports an
3092/// internal failure.
3093pub fn dsytrd(
3094    ctx: &Context,
3095    fill_mode: FillMode,
3096    n: usize,
3097    a: &mut DeviceMemory<f64>,
3098    lda: usize,
3099    d: &mut DeviceMemory<f64>,
3100    e: &mut DeviceMemory<f64>,
3101    tau: &mut DeviceMemory<f64>,
3102    workspace: &mut DeviceMemory<f64>,
3103    dev_info: &mut DeviceMemory<i32>,
3104) -> Result<()> {
3105    ctx.bind()?;
3106    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3107    require_info_buffer(dev_info)?;
3108    let lwork = dsytrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3109    require_workspace(workspace.len(), lwork)?;
3110    unsafe {
3111        try_ffi!(sys::cusolverDnDsytrd(
3112            ctx.as_raw(),
3113            fill_mode.into(),
3114            to_i32(n, "n")?,
3115            a.as_mut_ptr().cast(),
3116            to_i32(lda, "lda")?,
3117            d.as_mut_ptr().cast(),
3118            e.as_mut_ptr().cast(),
3119            tau.as_mut_ptr().cast(),
3120            workspace.as_mut_ptr().cast(),
3121            to_i32(lwork, "lwork")?,
3122            dev_info.as_mut_ptr().cast(),
3123        ))?;
3124    }
3125    Ok(())
3126}
3127
3128pub fn chetrd(
3129    ctx: &Context,
3130    fill_mode: FillMode,
3131    n: usize,
3132    a: &mut DeviceMemory<Complex32>,
3133    lda: usize,
3134    d: &mut DeviceMemory<f32>,
3135    e: &mut DeviceMemory<f32>,
3136    tau: &mut DeviceMemory<Complex32>,
3137    workspace: &mut DeviceMemory<Complex32>,
3138    dev_info: &mut DeviceMemory<i32>,
3139) -> Result<()> {
3140    ctx.bind()?;
3141    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3142    require_info_buffer(dev_info)?;
3143    let lwork = chetrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3144    require_workspace(workspace.len(), lwork)?;
3145    unsafe {
3146        try_ffi!(sys::cusolverDnChetrd(
3147            ctx.as_raw(),
3148            fill_mode.into(),
3149            to_i32(n, "n")?,
3150            a.as_mut_ptr().cast(),
3151            to_i32(lda, "lda")?,
3152            d.as_mut_ptr().cast(),
3153            e.as_mut_ptr().cast(),
3154            tau.as_mut_ptr().cast(),
3155            workspace.as_mut_ptr().cast(),
3156            to_i32(lwork, "lwork")?,
3157            dev_info.as_mut_ptr().cast(),
3158        ))?;
3159    }
3160    Ok(())
3161}
3162
3163pub fn zhetrd(
3164    ctx: &Context,
3165    fill_mode: FillMode,
3166    n: usize,
3167    a: &mut DeviceMemory<Complex64>,
3168    lda: usize,
3169    d: &mut DeviceMemory<f64>,
3170    e: &mut DeviceMemory<f64>,
3171    tau: &mut DeviceMemory<Complex64>,
3172    workspace: &mut DeviceMemory<Complex64>,
3173    dev_info: &mut DeviceMemory<i32>,
3174) -> Result<()> {
3175    ctx.bind()?;
3176    validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3177    require_info_buffer(dev_info)?;
3178    let lwork = zhetrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3179    require_workspace(workspace.len(), lwork)?;
3180    unsafe {
3181        try_ffi!(sys::cusolverDnZhetrd(
3182            ctx.as_raw(),
3183            fill_mode.into(),
3184            to_i32(n, "n")?,
3185            a.as_mut_ptr().cast(),
3186            to_i32(lda, "lda")?,
3187            d.as_mut_ptr().cast(),
3188            e.as_mut_ptr().cast(),
3189            tau.as_mut_ptr().cast(),
3190            workspace.as_mut_ptr().cast(),
3191            to_i32(lwork, "lwork")?,
3192            dev_info.as_mut_ptr().cast(),
3193        ))?;
3194    }
3195    Ok(())
3196}
3197
3198pub fn sorgtr_buffer_size(
3199    ctx: &Context,
3200    fill_mode: FillMode,
3201    n: usize,
3202    a: &DeviceMemory<f32>,
3203    lda: usize,
3204    tau: &DeviceMemory<f32>,
3205) -> Result<usize> {
3206    ctx.bind()?;
3207    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3208    let mut lwork = 0;
3209    unsafe {
3210        try_ffi!(sys::cusolverDnSorgtr_bufferSize(
3211            ctx.as_raw(),
3212            fill_mode.into(),
3213            to_i32(n, "n")?,
3214            a.as_ptr().cast(),
3215            to_i32(lda, "lda")?,
3216            tau.as_ptr().cast(),
3217            &raw mut lwork,
3218        ))?;
3219    }
3220    to_usize(lwork, "lwork")
3221}
3222
3223pub fn dorgtr_buffer_size(
3224    ctx: &Context,
3225    fill_mode: FillMode,
3226    n: usize,
3227    a: &DeviceMemory<f64>,
3228    lda: usize,
3229    tau: &DeviceMemory<f64>,
3230) -> Result<usize> {
3231    ctx.bind()?;
3232    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3233    let mut lwork = 0;
3234    unsafe {
3235        try_ffi!(sys::cusolverDnDorgtr_bufferSize(
3236            ctx.as_raw(),
3237            fill_mode.into(),
3238            to_i32(n, "n")?,
3239            a.as_ptr().cast(),
3240            to_i32(lda, "lda")?,
3241            tau.as_ptr().cast(),
3242            &raw mut lwork,
3243        ))?;
3244    }
3245    to_usize(lwork, "lwork")
3246}
3247
3248pub fn cungtr_buffer_size(
3249    ctx: &Context,
3250    fill_mode: FillMode,
3251    n: usize,
3252    a: &DeviceMemory<Complex32>,
3253    lda: usize,
3254    tau: &DeviceMemory<Complex32>,
3255) -> Result<usize> {
3256    ctx.bind()?;
3257    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3258    let mut lwork = 0;
3259    unsafe {
3260        try_ffi!(sys::cusolverDnCungtr_bufferSize(
3261            ctx.as_raw(),
3262            fill_mode.into(),
3263            to_i32(n, "n")?,
3264            a.as_ptr().cast(),
3265            to_i32(lda, "lda")?,
3266            tau.as_ptr().cast(),
3267            &raw mut lwork,
3268        ))?;
3269    }
3270    to_usize(lwork, "lwork")
3271}
3272
3273pub fn zungtr_buffer_size(
3274    ctx: &Context,
3275    fill_mode: FillMode,
3276    n: usize,
3277    a: &DeviceMemory<Complex64>,
3278    lda: usize,
3279    tau: &DeviceMemory<Complex64>,
3280) -> Result<usize> {
3281    ctx.bind()?;
3282    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3283    let mut lwork = 0;
3284    unsafe {
3285        try_ffi!(sys::cusolverDnZungtr_bufferSize(
3286            ctx.as_raw(),
3287            fill_mode.into(),
3288            to_i32(n, "n")?,
3289            a.as_ptr().cast(),
3290            to_i32(lda, "lda")?,
3291            tau.as_ptr().cast(),
3292            &raw mut lwork,
3293        ))?;
3294    }
3295    to_usize(lwork, "lwork")
3296}
3297
3298/// Use the matching buffer-size helper to calculate the required workspace size.
3299///
3300/// The S and D data types are real valued single and double precision, respectively.
3301///
3302/// The C and Z data types are complex valued single and double precision, respectively.
3303///
3304/// Generates the orthogonal matrix `Q` from the elementary reflectors returned
3305/// by `sytrd`.
3306///
3307/// Provide workspace through `workspace`.
3308/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3309/// The workspace size in bytes is `size_of::<T>() * lwork`.
3310///
3311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3312///
3313/// # Errors
3314///
3315/// Returns an error if cuSOLVER has not been initialized, if the
3316/// matrix dimensions or leading dimension are invalid, if the current GPU
3317/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3318pub fn sorgtr(
3319    ctx: &Context,
3320    fill_mode: FillMode,
3321    n: usize,
3322    a: &mut DeviceMemory<f32>,
3323    lda: usize,
3324    tau: &DeviceMemory<f32>,
3325    workspace: &mut DeviceMemory<f32>,
3326    dev_info: &mut DeviceMemory<i32>,
3327) -> Result<()> {
3328    ctx.bind()?;
3329    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3330    require_info_buffer(dev_info)?;
3331    let lwork = sorgtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3332    require_workspace(workspace.len(), lwork)?;
3333    unsafe {
3334        try_ffi!(sys::cusolverDnSorgtr(
3335            ctx.as_raw(),
3336            fill_mode.into(),
3337            to_i32(n, "n")?,
3338            a.as_mut_ptr().cast(),
3339            to_i32(lda, "lda")?,
3340            tau.as_ptr().cast(),
3341            workspace.as_mut_ptr().cast(),
3342            to_i32(lwork, "lwork")?,
3343            dev_info.as_mut_ptr().cast(),
3344        ))?;
3345    }
3346    Ok(())
3347}
3348
3349/// Use the matching buffer-size helper to calculate the required workspace size.
3350///
3351/// The S and D data types are real valued single and double precision, respectively.
3352///
3353/// The C and Z data types are complex valued single and double precision, respectively.
3354///
3355/// Generates the orthogonal matrix `Q` from the elementary reflectors returned
3356/// by `sytrd`.
3357///
3358/// Provide workspace through `workspace`.
3359/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3360/// The workspace size in bytes is `size_of::<T>() * lwork`.
3361///
3362/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3363///
3364/// # Errors
3365///
3366/// Returns an error if cuSOLVER has not been initialized, if the
3367/// matrix dimensions or leading dimension are invalid, if the current GPU
3368/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3369pub fn dorgtr(
3370    ctx: &Context,
3371    fill_mode: FillMode,
3372    n: usize,
3373    a: &mut DeviceMemory<f64>,
3374    lda: usize,
3375    tau: &DeviceMemory<f64>,
3376    workspace: &mut DeviceMemory<f64>,
3377    dev_info: &mut DeviceMemory<i32>,
3378) -> Result<()> {
3379    ctx.bind()?;
3380    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3381    require_info_buffer(dev_info)?;
3382    let lwork = dorgtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3383    require_workspace(workspace.len(), lwork)?;
3384    unsafe {
3385        try_ffi!(sys::cusolverDnDorgtr(
3386            ctx.as_raw(),
3387            fill_mode.into(),
3388            to_i32(n, "n")?,
3389            a.as_mut_ptr().cast(),
3390            to_i32(lda, "lda")?,
3391            tau.as_ptr().cast(),
3392            workspace.as_mut_ptr().cast(),
3393            to_i32(lwork, "lwork")?,
3394            dev_info.as_mut_ptr().cast(),
3395        ))?;
3396    }
3397    Ok(())
3398}
3399
3400pub fn cungtr(
3401    ctx: &Context,
3402    fill_mode: FillMode,
3403    n: usize,
3404    a: &mut DeviceMemory<Complex32>,
3405    lda: usize,
3406    tau: &DeviceMemory<Complex32>,
3407    workspace: &mut DeviceMemory<Complex32>,
3408    dev_info: &mut DeviceMemory<i32>,
3409) -> Result<()> {
3410    ctx.bind()?;
3411    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3412    require_info_buffer(dev_info)?;
3413    let lwork = cungtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3414    require_workspace(workspace.len(), lwork)?;
3415    unsafe {
3416        try_ffi!(sys::cusolverDnCungtr(
3417            ctx.as_raw(),
3418            fill_mode.into(),
3419            to_i32(n, "n")?,
3420            a.as_mut_ptr().cast(),
3421            to_i32(lda, "lda")?,
3422            tau.as_ptr().cast(),
3423            workspace.as_mut_ptr().cast(),
3424            to_i32(lwork, "lwork")?,
3425            dev_info.as_mut_ptr().cast(),
3426        ))?;
3427    }
3428    Ok(())
3429}
3430
3431pub fn zungtr(
3432    ctx: &Context,
3433    fill_mode: FillMode,
3434    n: usize,
3435    a: &mut DeviceMemory<Complex64>,
3436    lda: usize,
3437    tau: &DeviceMemory<Complex64>,
3438    workspace: &mut DeviceMemory<Complex64>,
3439    dev_info: &mut DeviceMemory<i32>,
3440) -> Result<()> {
3441    ctx.bind()?;
3442    validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3443    require_info_buffer(dev_info)?;
3444    let lwork = zungtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3445    require_workspace(workspace.len(), lwork)?;
3446    unsafe {
3447        try_ffi!(sys::cusolverDnZungtr(
3448            ctx.as_raw(),
3449            fill_mode.into(),
3450            to_i32(n, "n")?,
3451            a.as_mut_ptr().cast(),
3452            to_i32(lda, "lda")?,
3453            tau.as_ptr().cast(),
3454            workspace.as_mut_ptr().cast(),
3455            to_i32(lwork, "lwork")?,
3456            dev_info.as_mut_ptr().cast(),
3457        ))?;
3458    }
3459    Ok(())
3460}
3461
3462pub fn sormtr_buffer_size(
3463    ctx: &Context,
3464    side: SideMode,
3465    fill_mode: FillMode,
3466    operation: Operation,
3467    m: usize,
3468    n: usize,
3469    a: &DeviceMemory<f32>,
3470    lda: usize,
3471    tau: &DeviceMemory<f32>,
3472    c: &DeviceMemory<f32>,
3473    ldc: usize,
3474) -> Result<usize> {
3475    ctx.bind()?;
3476    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3477    let mut lwork = 0;
3478    unsafe {
3479        try_ffi!(sys::cusolverDnSormtr_bufferSize(
3480            ctx.as_raw(),
3481            side.into(),
3482            fill_mode.into(),
3483            operation.into(),
3484            to_i32(m, "m")?,
3485            to_i32(n, "n")?,
3486            a.as_ptr().cast(),
3487            to_i32(lda, "lda")?,
3488            tau.as_ptr().cast(),
3489            c.as_ptr().cast(),
3490            to_i32(ldc, "ldc")?,
3491            &raw mut lwork,
3492        ))?;
3493    }
3494    to_usize(lwork, "lwork")
3495}
3496
3497pub fn dormtr_buffer_size(
3498    ctx: &Context,
3499    side: SideMode,
3500    fill_mode: FillMode,
3501    operation: Operation,
3502    m: usize,
3503    n: usize,
3504    a: &DeviceMemory<f64>,
3505    lda: usize,
3506    tau: &DeviceMemory<f64>,
3507    c: &DeviceMemory<f64>,
3508    ldc: usize,
3509) -> Result<usize> {
3510    ctx.bind()?;
3511    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3512    let mut lwork = 0;
3513    unsafe {
3514        try_ffi!(sys::cusolverDnDormtr_bufferSize(
3515            ctx.as_raw(),
3516            side.into(),
3517            fill_mode.into(),
3518            operation.into(),
3519            to_i32(m, "m")?,
3520            to_i32(n, "n")?,
3521            a.as_ptr().cast(),
3522            to_i32(lda, "lda")?,
3523            tau.as_ptr().cast(),
3524            c.as_ptr().cast(),
3525            to_i32(ldc, "ldc")?,
3526            &raw mut lwork,
3527        ))?;
3528    }
3529    to_usize(lwork, "lwork")
3530}
3531
3532pub fn cunmtr_buffer_size(
3533    ctx: &Context,
3534    side: SideMode,
3535    fill_mode: FillMode,
3536    operation: Operation,
3537    m: usize,
3538    n: usize,
3539    a: &DeviceMemory<Complex32>,
3540    lda: usize,
3541    tau: &DeviceMemory<Complex32>,
3542    c: &DeviceMemory<Complex32>,
3543    ldc: usize,
3544) -> Result<usize> {
3545    ctx.bind()?;
3546    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3547    let mut lwork = 0;
3548    unsafe {
3549        try_ffi!(sys::cusolverDnCunmtr_bufferSize(
3550            ctx.as_raw(),
3551            side.into(),
3552            fill_mode.into(),
3553            operation.into(),
3554            to_i32(m, "m")?,
3555            to_i32(n, "n")?,
3556            a.as_ptr().cast(),
3557            to_i32(lda, "lda")?,
3558            tau.as_ptr().cast(),
3559            c.as_ptr().cast(),
3560            to_i32(ldc, "ldc")?,
3561            &raw mut lwork,
3562        ))?;
3563    }
3564    to_usize(lwork, "lwork")
3565}
3566
3567pub fn zunmtr_buffer_size(
3568    ctx: &Context,
3569    side: SideMode,
3570    fill_mode: FillMode,
3571    operation: Operation,
3572    m: usize,
3573    n: usize,
3574    a: &DeviceMemory<Complex64>,
3575    lda: usize,
3576    tau: &DeviceMemory<Complex64>,
3577    c: &DeviceMemory<Complex64>,
3578    ldc: usize,
3579) -> Result<usize> {
3580    ctx.bind()?;
3581    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3582    let mut lwork = 0;
3583    unsafe {
3584        try_ffi!(sys::cusolverDnZunmtr_bufferSize(
3585            ctx.as_raw(),
3586            side.into(),
3587            fill_mode.into(),
3588            operation.into(),
3589            to_i32(m, "m")?,
3590            to_i32(n, "n")?,
3591            a.as_ptr().cast(),
3592            to_i32(lda, "lda")?,
3593            tau.as_ptr().cast(),
3594            c.as_ptr().cast(),
3595            to_i32(ldc, "ldc")?,
3596            &raw mut lwork,
3597        ))?;
3598    }
3599    to_usize(lwork, "lwork")
3600}
3601
3602/// Use the matching buffer-size helper to calculate the required workspace size.
3603///
3604/// The S and D data types are real valued single and double precision, respectively.
3605///
3606/// The C and Z data types are complex valued single and double precision, respectively.
3607///
3608/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
3609/// returned by `sytrd`, to `C` and stores the result in `C`.
3610///
3611/// `side` selects whether `Q` is applied from the left or right, and
3612/// `operation` selects whether `Q` is transposed.
3613///
3614/// Provide workspace through `workspace`.
3615/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3616/// The workspace size in bytes is `size_of::<T>() * lwork`.
3617///
3618/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3619///
3620/// # Errors
3621///
3622/// Returns an error if cuSOLVER has not been initialized, if the
3623/// matrix dimensions or leading dimensions are invalid, if the current GPU
3624/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3625pub fn sormtr(
3626    ctx: &Context,
3627    side: SideMode,
3628    fill_mode: FillMode,
3629    operation: Operation,
3630    m: usize,
3631    n: usize,
3632    a: &mut DeviceMemory<f32>,
3633    lda: usize,
3634    tau: &mut DeviceMemory<f32>,
3635    c: &mut DeviceMemory<f32>,
3636    ldc: usize,
3637    workspace: &mut DeviceMemory<f32>,
3638    dev_info: &mut DeviceMemory<i32>,
3639) -> Result<()> {
3640    ctx.bind()?;
3641    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3642    require_info_buffer(dev_info)?;
3643    let lwork = sormtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3644    require_workspace(workspace.len(), lwork)?;
3645    unsafe {
3646        try_ffi!(sys::cusolverDnSormtr(
3647            ctx.as_raw(),
3648            side.into(),
3649            fill_mode.into(),
3650            operation.into(),
3651            to_i32(m, "m")?,
3652            to_i32(n, "n")?,
3653            a.as_mut_ptr().cast(),
3654            to_i32(lda, "lda")?,
3655            tau.as_mut_ptr().cast(),
3656            c.as_mut_ptr().cast(),
3657            to_i32(ldc, "ldc")?,
3658            workspace.as_mut_ptr().cast(),
3659            to_i32(lwork, "lwork")?,
3660            dev_info.as_mut_ptr().cast(),
3661        ))?;
3662    }
3663    Ok(())
3664}
3665
3666/// Use the matching buffer-size helper to calculate the required workspace size.
3667///
3668/// The S and D data types are real valued single and double precision, respectively.
3669///
3670/// The C and Z data types are complex valued single and double precision, respectively.
3671///
3672/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
3673/// returned by `sytrd`, to `C` and stores the result in `C`.
3674///
3675/// `side` selects whether `Q` is applied from the left or right, and
3676/// `operation` selects whether `Q` is transposed.
3677///
3678/// Provide workspace through `workspace`.
3679/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3680/// The workspace size in bytes is `size_of::<T>() * lwork`.
3681///
3682/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3683///
3684/// # Errors
3685///
3686/// Returns an error if cuSOLVER has not been initialized, if the
3687/// matrix dimensions or leading dimensions are invalid, if the current GPU
3688/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3689pub fn dormtr(
3690    ctx: &Context,
3691    side: SideMode,
3692    fill_mode: FillMode,
3693    operation: Operation,
3694    m: usize,
3695    n: usize,
3696    a: &mut DeviceMemory<f64>,
3697    lda: usize,
3698    tau: &mut DeviceMemory<f64>,
3699    c: &mut DeviceMemory<f64>,
3700    ldc: usize,
3701    workspace: &mut DeviceMemory<f64>,
3702    dev_info: &mut DeviceMemory<i32>,
3703) -> Result<()> {
3704    ctx.bind()?;
3705    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3706    require_info_buffer(dev_info)?;
3707    let lwork = dormtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3708    require_workspace(workspace.len(), lwork)?;
3709    unsafe {
3710        try_ffi!(sys::cusolverDnDormtr(
3711            ctx.as_raw(),
3712            side.into(),
3713            fill_mode.into(),
3714            operation.into(),
3715            to_i32(m, "m")?,
3716            to_i32(n, "n")?,
3717            a.as_mut_ptr().cast(),
3718            to_i32(lda, "lda")?,
3719            tau.as_mut_ptr().cast(),
3720            c.as_mut_ptr().cast(),
3721            to_i32(ldc, "ldc")?,
3722            workspace.as_mut_ptr().cast(),
3723            to_i32(lwork, "lwork")?,
3724            dev_info.as_mut_ptr().cast(),
3725        ))?;
3726    }
3727    Ok(())
3728}
3729
3730pub fn cunmtr(
3731    ctx: &Context,
3732    side: SideMode,
3733    fill_mode: FillMode,
3734    operation: Operation,
3735    m: usize,
3736    n: usize,
3737    a: &mut DeviceMemory<Complex32>,
3738    lda: usize,
3739    tau: &mut DeviceMemory<Complex32>,
3740    c: &mut DeviceMemory<Complex32>,
3741    ldc: usize,
3742    workspace: &mut DeviceMemory<Complex32>,
3743    dev_info: &mut DeviceMemory<i32>,
3744) -> Result<()> {
3745    ctx.bind()?;
3746    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3747    require_info_buffer(dev_info)?;
3748    let lwork = cunmtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3749    require_workspace(workspace.len(), lwork)?;
3750    unsafe {
3751        try_ffi!(sys::cusolverDnCunmtr(
3752            ctx.as_raw(),
3753            side.into(),
3754            fill_mode.into(),
3755            operation.into(),
3756            to_i32(m, "m")?,
3757            to_i32(n, "n")?,
3758            a.as_mut_ptr().cast(),
3759            to_i32(lda, "lda")?,
3760            tau.as_mut_ptr().cast(),
3761            c.as_mut_ptr().cast(),
3762            to_i32(ldc, "ldc")?,
3763            workspace.as_mut_ptr().cast(),
3764            to_i32(lwork, "lwork")?,
3765            dev_info.as_mut_ptr().cast(),
3766        ))?;
3767    }
3768    Ok(())
3769}
3770
3771pub fn zunmtr(
3772    ctx: &Context,
3773    side: SideMode,
3774    fill_mode: FillMode,
3775    operation: Operation,
3776    m: usize,
3777    n: usize,
3778    a: &mut DeviceMemory<Complex64>,
3779    lda: usize,
3780    tau: &mut DeviceMemory<Complex64>,
3781    c: &mut DeviceMemory<Complex64>,
3782    ldc: usize,
3783    workspace: &mut DeviceMemory<Complex64>,
3784    dev_info: &mut DeviceMemory<i32>,
3785) -> Result<()> {
3786    ctx.bind()?;
3787    validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3788    require_info_buffer(dev_info)?;
3789    let lwork = zunmtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3790    require_workspace(workspace.len(), lwork)?;
3791    unsafe {
3792        try_ffi!(sys::cusolverDnZunmtr(
3793            ctx.as_raw(),
3794            side.into(),
3795            fill_mode.into(),
3796            operation.into(),
3797            to_i32(m, "m")?,
3798            to_i32(n, "n")?,
3799            a.as_mut_ptr().cast(),
3800            to_i32(lda, "lda")?,
3801            tau.as_mut_ptr().cast(),
3802            c.as_mut_ptr().cast(),
3803            to_i32(ldc, "ldc")?,
3804            workspace.as_mut_ptr().cast(),
3805            to_i32(lwork, "lwork")?,
3806            dev_info.as_mut_ptr().cast(),
3807        ))?;
3808    }
3809    Ok(())
3810}
3811
3812pub fn sgeqrf_buffer_size(
3813    ctx: &Context,
3814    m: usize,
3815    n: usize,
3816    a: &mut DeviceMemory<f32>,
3817    lda: usize,
3818) -> Result<usize> {
3819    ctx.bind()?;
3820    validate_matrix(m, n, a.len(), lda)?;
3821    let mut lwork = 0;
3822    unsafe {
3823        try_ffi!(sys::cusolverDnSgeqrf_bufferSize(
3824            ctx.as_raw(),
3825            to_i32(m, "m")?,
3826            to_i32(n, "n")?,
3827            a.as_mut_ptr().cast(),
3828            to_i32(lda, "lda")?,
3829            &raw mut lwork,
3830        ))?;
3831    }
3832    to_usize(lwork, "lwork")
3833}
3834
3835pub fn dgeqrf_buffer_size(
3836    ctx: &Context,
3837    m: usize,
3838    n: usize,
3839    a: &mut DeviceMemory<f64>,
3840    lda: usize,
3841) -> Result<usize> {
3842    ctx.bind()?;
3843    validate_matrix(m, n, a.len(), lda)?;
3844    let mut lwork = 0;
3845    unsafe {
3846        try_ffi!(sys::cusolverDnDgeqrf_bufferSize(
3847            ctx.as_raw(),
3848            to_i32(m, "m")?,
3849            to_i32(n, "n")?,
3850            a.as_mut_ptr().cast(),
3851            to_i32(lda, "lda")?,
3852            &raw mut lwork,
3853        ))?;
3854    }
3855    to_usize(lwork, "lwork")
3856}
3857
3858pub fn cgeqrf_buffer_size(
3859    ctx: &Context,
3860    m: usize,
3861    n: usize,
3862    a: &mut DeviceMemory<Complex32>,
3863    lda: usize,
3864) -> Result<usize> {
3865    ctx.bind()?;
3866    validate_matrix(m, n, a.len(), lda)?;
3867    let mut lwork = 0;
3868    unsafe {
3869        try_ffi!(sys::cusolverDnCgeqrf_bufferSize(
3870            ctx.as_raw(),
3871            to_i32(m, "m")?,
3872            to_i32(n, "n")?,
3873            a.as_mut_ptr().cast(),
3874            to_i32(lda, "lda")?,
3875            &raw mut lwork,
3876        ))?;
3877    }
3878    to_usize(lwork, "lwork")
3879}
3880
3881pub fn zgeqrf_buffer_size(
3882    ctx: &Context,
3883    m: usize,
3884    n: usize,
3885    a: &mut DeviceMemory<Complex64>,
3886    lda: usize,
3887) -> Result<usize> {
3888    ctx.bind()?;
3889    validate_matrix(m, n, a.len(), lda)?;
3890    let mut lwork = 0;
3891    unsafe {
3892        try_ffi!(sys::cusolverDnZgeqrf_bufferSize(
3893            ctx.as_raw(),
3894            to_i32(m, "m")?,
3895            to_i32(n, "n")?,
3896            a.as_mut_ptr().cast(),
3897            to_i32(lda, "lda")?,
3898            &raw mut lwork,
3899        ))?;
3900    }
3901    to_usize(lwork, "lwork")
3902}
3903
3904/// Use the matching buffer-size helper to calculate the required workspace size.
3905///
3906/// The S and D data types are real valued single and double precision, respectively.
3907///
3908/// The C and Z data types are complex valued single and double precision, respectively.
3909///
3910/// Computes the QR factorization of an $m \times n$ matrix
3911///
3912/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
3913///
3914/// Provide workspace through `workspace`.
3915/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3916///
3917/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
3918///
3919/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
3920/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `τ`.
3921/// If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation
3922///
3923/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3924///
3925/// # Errors
3926///
3927/// Returns an error if cuSOLVER has not been initialized, if the
3928/// matrix dimensions or leading dimension are invalid, if the current GPU
3929/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3930pub fn sgeqrf(
3931    ctx: &Context,
3932    m: usize,
3933    n: usize,
3934    a: &mut DeviceMemory<f32>,
3935    lda: usize,
3936    tau: &mut DeviceMemory<f32>,
3937    workspace: &mut DeviceMemory<f32>,
3938    dev_info: &mut DeviceMemory<i32>,
3939) -> Result<()> {
3940    ctx.bind()?;
3941    validate_matrix(m, n, a.len(), lda)?;
3942    require_tau_buffer(tau, m.min(n))?;
3943    require_info_buffer(dev_info)?;
3944    let lwork = sgeqrf_buffer_size(ctx, m, n, a, lda)?;
3945    require_workspace(workspace.len(), lwork)?;
3946    unsafe {
3947        try_ffi!(sys::cusolverDnSgeqrf(
3948            ctx.as_raw(),
3949            to_i32(m, "m")?,
3950            to_i32(n, "n")?,
3951            a.as_mut_ptr().cast(),
3952            to_i32(lda, "lda")?,
3953            tau.as_mut_ptr().cast(),
3954            workspace.as_mut_ptr().cast(),
3955            to_i32(lwork, "lwork")?,
3956            dev_info.as_mut_ptr().cast(),
3957        ))?;
3958    }
3959    Ok(())
3960}
3961
3962/// Use the matching buffer-size helper to calculate the required workspace size.
3963///
3964/// The S and D data types are real valued single and double precision, respectively.
3965///
3966/// The C and Z data types are complex valued single and double precision, respectively.
3967///
3968/// Computes the QR factorization of an $m \times n$ matrix
3969///
3970/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
3971///
3972/// Provide workspace through `workspace`.
3973/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3974///
3975/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
3976///
3977/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
3978/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `τ`.
3979/// If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation
3980///
3981/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3982///
3983/// # Errors
3984///
3985/// Returns an error if cuSOLVER has not been initialized, if the
3986/// matrix dimensions or leading dimension are invalid, if the current GPU
3987/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3988pub fn dgeqrf(
3989    ctx: &Context,
3990    m: usize,
3991    n: usize,
3992    a: &mut DeviceMemory<f64>,
3993    lda: usize,
3994    tau: &mut DeviceMemory<f64>,
3995    workspace: &mut DeviceMemory<f64>,
3996    dev_info: &mut DeviceMemory<i32>,
3997) -> Result<()> {
3998    ctx.bind()?;
3999    validate_matrix(m, n, a.len(), lda)?;
4000    require_tau_buffer(tau, m.min(n))?;
4001    require_info_buffer(dev_info)?;
4002    let lwork = dgeqrf_buffer_size(ctx, m, n, a, lda)?;
4003    require_workspace(workspace.len(), lwork)?;
4004    unsafe {
4005        try_ffi!(sys::cusolverDnDgeqrf(
4006            ctx.as_raw(),
4007            to_i32(m, "m")?,
4008            to_i32(n, "n")?,
4009            a.as_mut_ptr().cast(),
4010            to_i32(lda, "lda")?,
4011            tau.as_mut_ptr().cast(),
4012            workspace.as_mut_ptr().cast(),
4013            to_i32(lwork, "lwork")?,
4014            dev_info.as_mut_ptr().cast(),
4015        ))?;
4016    }
4017    Ok(())
4018}
4019
4020/// Use the matching buffer-size helper to calculate the required workspace size.
4021///
4022/// The S and D data types are real valued single and double precision, respectively.
4023///
4024/// The C and Z data types are complex valued single and double precision, respectively.
4025///
4026/// Computes the QR factorization of an $m \times n$ matrix
4027///
4028/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
4029///
4030/// Provide workspace through `workspace`.
4031/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4032///
4033/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
4034///
4035/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
4036/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `τ`.
4037/// If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation
4038///
4039/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4040///
4041/// # Errors
4042///
4043/// Returns an error if cuSOLVER has not been initialized, if the
4044/// matrix dimensions or leading dimension are invalid, if the current GPU
4045/// architecture is unsupported, or if cuSOLVER reports an internal failure.
4046pub fn cgeqrf(
4047    ctx: &Context,
4048    m: usize,
4049    n: usize,
4050    a: &mut DeviceMemory<Complex32>,
4051    lda: usize,
4052    tau: &mut DeviceMemory<Complex32>,
4053    workspace: &mut DeviceMemory<Complex32>,
4054    dev_info: &mut DeviceMemory<i32>,
4055) -> Result<()> {
4056    ctx.bind()?;
4057    validate_matrix(m, n, a.len(), lda)?;
4058    require_tau_buffer(tau, m.min(n))?;
4059    require_info_buffer(dev_info)?;
4060    let lwork = cgeqrf_buffer_size(ctx, m, n, a, lda)?;
4061    require_workspace(workspace.len(), lwork)?;
4062    unsafe {
4063        try_ffi!(sys::cusolverDnCgeqrf(
4064            ctx.as_raw(),
4065            to_i32(m, "m")?,
4066            to_i32(n, "n")?,
4067            a.as_mut_ptr().cast(),
4068            to_i32(lda, "lda")?,
4069            tau.as_mut_ptr().cast(),
4070            workspace.as_mut_ptr().cast(),
4071            to_i32(lwork, "lwork")?,
4072            dev_info.as_mut_ptr().cast(),
4073        ))?;
4074    }
4075    Ok(())
4076}
4077
4078/// Use the matching buffer-size helper to calculate the required workspace size.
4079///
4080/// The S and D data types are real valued single and double precision, respectively.
4081///
4082/// The C and Z data types are complex valued single and double precision, respectively.
4083///
4084/// Computes the QR factorization of an $m \times n$ matrix
4085///
4086/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
4087///
4088/// Provide workspace through `workspace`.
4089/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4090///
4091/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
4092///
4093/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
4094/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `τ`.
4095/// If `v` is original householder vector, `q` is the new householder vector corresponding to `τ`, satisfying the following relation
4096///
4097/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4098///
4099/// # Errors
4100///
4101/// Returns an error if cuSOLVER has not been initialized, if the
4102/// matrix dimensions or leading dimension are invalid, if the current GPU
4103/// architecture is unsupported, or if cuSOLVER reports an internal failure.
4104pub fn zgeqrf(
4105    ctx: &Context,
4106    m: usize,
4107    n: usize,
4108    a: &mut DeviceMemory<Complex64>,
4109    lda: usize,
4110    tau: &mut DeviceMemory<Complex64>,
4111    workspace: &mut DeviceMemory<Complex64>,
4112    dev_info: &mut DeviceMemory<i32>,
4113) -> Result<()> {
4114    ctx.bind()?;
4115    validate_matrix(m, n, a.len(), lda)?;
4116    require_tau_buffer(tau, m.min(n))?;
4117    require_info_buffer(dev_info)?;
4118    let lwork = zgeqrf_buffer_size(ctx, m, n, a, lda)?;
4119    require_workspace(workspace.len(), lwork)?;
4120    unsafe {
4121        try_ffi!(sys::cusolverDnZgeqrf(
4122            ctx.as_raw(),
4123            to_i32(m, "m")?,
4124            to_i32(n, "n")?,
4125            a.as_mut_ptr().cast(),
4126            to_i32(lda, "lda")?,
4127            tau.as_mut_ptr().cast(),
4128            workspace.as_mut_ptr().cast(),
4129            to_i32(lwork, "lwork")?,
4130            dev_info.as_mut_ptr().cast(),
4131        ))?;
4132    }
4133    Ok(())
4134}
4135
4136pub fn sorgqr_buffer_size(
4137    ctx: &Context,
4138    m: usize,
4139    n: usize,
4140    k: usize,
4141    a: &DeviceMemory<f32>,
4142    lda: usize,
4143    tau: &DeviceMemory<f32>,
4144) -> Result<usize> {
4145    ctx.bind()?;
4146    validate_matrix(m, n, a.len(), lda)?;
4147    require_tau_buffer(tau, k)?;
4148    let mut lwork = 0;
4149    unsafe {
4150        try_ffi!(sys::cusolverDnSorgqr_bufferSize(
4151            ctx.as_raw(),
4152            to_i32(m, "m")?,
4153            to_i32(n, "n")?,
4154            to_i32(k, "k")?,
4155            a.as_ptr().cast(),
4156            to_i32(lda, "lda")?,
4157            tau.as_ptr().cast(),
4158            &raw mut lwork,
4159        ))?;
4160    }
4161    to_usize(lwork, "lwork")
4162}
4163
4164pub fn dorgqr_buffer_size(
4165    ctx: &Context,
4166    m: usize,
4167    n: usize,
4168    k: usize,
4169    a: &DeviceMemory<f64>,
4170    lda: usize,
4171    tau: &DeviceMemory<f64>,
4172) -> Result<usize> {
4173    ctx.bind()?;
4174    validate_matrix(m, n, a.len(), lda)?;
4175    require_tau_buffer(tau, k)?;
4176    let mut lwork = 0;
4177    unsafe {
4178        try_ffi!(sys::cusolverDnDorgqr_bufferSize(
4179            ctx.as_raw(),
4180            to_i32(m, "m")?,
4181            to_i32(n, "n")?,
4182            to_i32(k, "k")?,
4183            a.as_ptr().cast(),
4184            to_i32(lda, "lda")?,
4185            tau.as_ptr().cast(),
4186            &raw mut lwork,
4187        ))?;
4188    }
4189    to_usize(lwork, "lwork")
4190}
4191
4192pub fn cungqr_buffer_size(
4193    ctx: &Context,
4194    m: usize,
4195    n: usize,
4196    k: usize,
4197    a: &DeviceMemory<Complex32>,
4198    lda: usize,
4199    tau: &DeviceMemory<Complex32>,
4200) -> Result<usize> {
4201    ctx.bind()?;
4202    validate_matrix(m, n, a.len(), lda)?;
4203    require_tau_buffer(tau, k)?;
4204    let mut lwork = 0;
4205    unsafe {
4206        try_ffi!(sys::cusolverDnCungqr_bufferSize(
4207            ctx.as_raw(),
4208            to_i32(m, "m")?,
4209            to_i32(n, "n")?,
4210            to_i32(k, "k")?,
4211            a.as_ptr().cast(),
4212            to_i32(lda, "lda")?,
4213            tau.as_ptr().cast(),
4214            &raw mut lwork,
4215        ))?;
4216    }
4217    to_usize(lwork, "lwork")
4218}
4219
4220pub fn zungqr_buffer_size(
4221    ctx: &Context,
4222    m: usize,
4223    n: usize,
4224    k: usize,
4225    a: &DeviceMemory<Complex64>,
4226    lda: usize,
4227    tau: &DeviceMemory<Complex64>,
4228) -> Result<usize> {
4229    ctx.bind()?;
4230    validate_matrix(m, n, a.len(), lda)?;
4231    require_tau_buffer(tau, k)?;
4232    let mut lwork = 0;
4233    unsafe {
4234        try_ffi!(sys::cusolverDnZungqr_bufferSize(
4235            ctx.as_raw(),
4236            to_i32(m, "m")?,
4237            to_i32(n, "n")?,
4238            to_i32(k, "k")?,
4239            a.as_ptr().cast(),
4240            to_i32(lda, "lda")?,
4241            tau.as_ptr().cast(),
4242            &raw mut lwork,
4243        ))?;
4244    }
4245    to_usize(lwork, "lwork")
4246}
4247
4248/// Use the matching buffer-size helper to calculate the required workspace size.
4249///
4250/// The S and D data types are real valued single and double precision, respectively.
4251///
4252/// The C and Z data types are complex valued single and double precision, respectively.
4253///
4254/// Generates the first `n` columns of the orthogonal matrix `Q` from the
4255/// elementary reflectors returned by `geqrf` and stores them in `A`.
4256///
4257/// Provide workspace through `workspace`.
4258/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4259/// The workspace size in bytes is `size_of::<T>() * lwork`.
4260///
4261/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4262///
4263/// Callers can combine `geqrf` and `orgqr` to complete orthogonalization.
4264///
4265/// # Errors
4266///
4267/// Returns an error if cuSOLVER has not been initialized, if the
4268/// matrix dimensions, reflector count, or leading dimension are invalid, if
4269/// the current GPU architecture is unsupported, or if cuSOLVER reports an
4270/// internal failure.
4271pub fn sorgqr(
4272    ctx: &Context,
4273    m: usize,
4274    n: usize,
4275    k: usize,
4276    a: &mut DeviceMemory<f32>,
4277    lda: usize,
4278    tau: &DeviceMemory<f32>,
4279    workspace: &mut DeviceMemory<f32>,
4280    dev_info: &mut DeviceMemory<i32>,
4281) -> Result<()> {
4282    ctx.bind()?;
4283    validate_matrix(m, n, a.len(), lda)?;
4284    require_tau_buffer(tau, k)?;
4285    require_info_buffer(dev_info)?;
4286    let lwork = sorgqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4287    require_workspace(workspace.len(), lwork)?;
4288    unsafe {
4289        try_ffi!(sys::cusolverDnSorgqr(
4290            ctx.as_raw(),
4291            to_i32(m, "m")?,
4292            to_i32(n, "n")?,
4293            to_i32(k, "k")?,
4294            a.as_mut_ptr().cast(),
4295            to_i32(lda, "lda")?,
4296            tau.as_ptr().cast(),
4297            workspace.as_mut_ptr().cast(),
4298            to_i32(lwork, "lwork")?,
4299            dev_info.as_mut_ptr().cast(),
4300        ))?;
4301    }
4302    Ok(())
4303}
4304
4305/// Use the matching buffer-size helper to calculate the required workspace size.
4306///
4307/// The S and D data types are real valued single and double precision, respectively.
4308///
4309/// The C and Z data types are complex valued single and double precision, respectively.
4310///
4311/// Generates the first `n` columns of the orthogonal matrix `Q` from the
4312/// elementary reflectors returned by `geqrf` and stores them in `A`.
4313///
4314/// Provide workspace through `workspace`.
4315/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4316/// The workspace size in bytes is `size_of::<T>() * lwork`.
4317///
4318/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4319///
4320/// Callers can combine `geqrf` and `orgqr` to complete orthogonalization.
4321///
4322/// # Errors
4323///
4324/// Returns an error if cuSOLVER has not been initialized, if the
4325/// matrix dimensions, reflector count, or leading dimension are invalid, if
4326/// the current GPU architecture is unsupported, or if cuSOLVER reports an
4327/// internal failure.
4328pub fn dorgqr(
4329    ctx: &Context,
4330    m: usize,
4331    n: usize,
4332    k: usize,
4333    a: &mut DeviceMemory<f64>,
4334    lda: usize,
4335    tau: &DeviceMemory<f64>,
4336    workspace: &mut DeviceMemory<f64>,
4337    dev_info: &mut DeviceMemory<i32>,
4338) -> Result<()> {
4339    ctx.bind()?;
4340    validate_matrix(m, n, a.len(), lda)?;
4341    require_tau_buffer(tau, k)?;
4342    require_info_buffer(dev_info)?;
4343    let lwork = dorgqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4344    require_workspace(workspace.len(), lwork)?;
4345    unsafe {
4346        try_ffi!(sys::cusolverDnDorgqr(
4347            ctx.as_raw(),
4348            to_i32(m, "m")?,
4349            to_i32(n, "n")?,
4350            to_i32(k, "k")?,
4351            a.as_mut_ptr().cast(),
4352            to_i32(lda, "lda")?,
4353            tau.as_ptr().cast(),
4354            workspace.as_mut_ptr().cast(),
4355            to_i32(lwork, "lwork")?,
4356            dev_info.as_mut_ptr().cast(),
4357        ))?;
4358    }
4359    Ok(())
4360}
4361
4362pub fn cungqr(
4363    ctx: &Context,
4364    m: usize,
4365    n: usize,
4366    k: usize,
4367    a: &mut DeviceMemory<Complex32>,
4368    lda: usize,
4369    tau: &DeviceMemory<Complex32>,
4370    workspace: &mut DeviceMemory<Complex32>,
4371    dev_info: &mut DeviceMemory<i32>,
4372) -> Result<()> {
4373    ctx.bind()?;
4374    validate_matrix(m, n, a.len(), lda)?;
4375    require_tau_buffer(tau, k)?;
4376    require_info_buffer(dev_info)?;
4377    let lwork = cungqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4378    require_workspace(workspace.len(), lwork)?;
4379    unsafe {
4380        try_ffi!(sys::cusolverDnCungqr(
4381            ctx.as_raw(),
4382            to_i32(m, "m")?,
4383            to_i32(n, "n")?,
4384            to_i32(k, "k")?,
4385            a.as_mut_ptr().cast(),
4386            to_i32(lda, "lda")?,
4387            tau.as_ptr().cast(),
4388            workspace.as_mut_ptr().cast(),
4389            to_i32(lwork, "lwork")?,
4390            dev_info.as_mut_ptr().cast(),
4391        ))?;
4392    }
4393    Ok(())
4394}
4395
4396pub fn zungqr(
4397    ctx: &Context,
4398    m: usize,
4399    n: usize,
4400    k: usize,
4401    a: &mut DeviceMemory<Complex64>,
4402    lda: usize,
4403    tau: &DeviceMemory<Complex64>,
4404    workspace: &mut DeviceMemory<Complex64>,
4405    dev_info: &mut DeviceMemory<i32>,
4406) -> Result<()> {
4407    ctx.bind()?;
4408    validate_matrix(m, n, a.len(), lda)?;
4409    require_tau_buffer(tau, k)?;
4410    require_info_buffer(dev_info)?;
4411    let lwork = zungqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4412    require_workspace(workspace.len(), lwork)?;
4413    unsafe {
4414        try_ffi!(sys::cusolverDnZungqr(
4415            ctx.as_raw(),
4416            to_i32(m, "m")?,
4417            to_i32(n, "n")?,
4418            to_i32(k, "k")?,
4419            a.as_mut_ptr().cast(),
4420            to_i32(lda, "lda")?,
4421            tau.as_ptr().cast(),
4422            workspace.as_mut_ptr().cast(),
4423            to_i32(lwork, "lwork")?,
4424            dev_info.as_mut_ptr().cast(),
4425        ))?;
4426    }
4427    Ok(())
4428}
4429
4430pub fn sormqr_buffer_size(
4431    ctx: &Context,
4432    side: SideMode,
4433    operation: Operation,
4434    m: usize,
4435    n: usize,
4436    k: usize,
4437    a: &DeviceMemory<f32>,
4438    lda: usize,
4439    tau: &DeviceMemory<f32>,
4440    c: &DeviceMemory<f32>,
4441    ldc: usize,
4442) -> Result<usize> {
4443    ctx.bind()?;
4444    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4445    require_tau_buffer(tau, k)?;
4446    validate_matrix(m, n, c.len(), ldc)?;
4447    let mut lwork = 0;
4448    unsafe {
4449        try_ffi!(sys::cusolverDnSormqr_bufferSize(
4450            ctx.as_raw(),
4451            side.into(),
4452            operation.into(),
4453            to_i32(m, "m")?,
4454            to_i32(n, "n")?,
4455            to_i32(k, "k")?,
4456            a.as_ptr().cast(),
4457            to_i32(lda, "lda")?,
4458            tau.as_ptr().cast(),
4459            c.as_ptr().cast(),
4460            to_i32(ldc, "ldc")?,
4461            &raw mut lwork,
4462        ))?;
4463    }
4464    to_usize(lwork, "lwork")
4465}
4466
4467pub fn dormqr_buffer_size(
4468    ctx: &Context,
4469    side: SideMode,
4470    operation: Operation,
4471    m: usize,
4472    n: usize,
4473    k: usize,
4474    a: &DeviceMemory<f64>,
4475    lda: usize,
4476    tau: &DeviceMemory<f64>,
4477    c: &DeviceMemory<f64>,
4478    ldc: usize,
4479) -> Result<usize> {
4480    ctx.bind()?;
4481    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4482    require_tau_buffer(tau, k)?;
4483    validate_matrix(m, n, c.len(), ldc)?;
4484    let mut lwork = 0;
4485    unsafe {
4486        try_ffi!(sys::cusolverDnDormqr_bufferSize(
4487            ctx.as_raw(),
4488            side.into(),
4489            operation.into(),
4490            to_i32(m, "m")?,
4491            to_i32(n, "n")?,
4492            to_i32(k, "k")?,
4493            a.as_ptr().cast(),
4494            to_i32(lda, "lda")?,
4495            tau.as_ptr().cast(),
4496            c.as_ptr().cast(),
4497            to_i32(ldc, "ldc")?,
4498            &raw mut lwork,
4499        ))?;
4500    }
4501    to_usize(lwork, "lwork")
4502}
4503
4504pub fn cunmqr_buffer_size(
4505    ctx: &Context,
4506    side: SideMode,
4507    operation: Operation,
4508    m: usize,
4509    n: usize,
4510    k: usize,
4511    a: &DeviceMemory<Complex32>,
4512    lda: usize,
4513    tau: &DeviceMemory<Complex32>,
4514    c: &DeviceMemory<Complex32>,
4515    ldc: usize,
4516) -> Result<usize> {
4517    ctx.bind()?;
4518    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4519    require_tau_buffer(tau, k)?;
4520    validate_matrix(m, n, c.len(), ldc)?;
4521    let mut lwork = 0;
4522    unsafe {
4523        try_ffi!(sys::cusolverDnCunmqr_bufferSize(
4524            ctx.as_raw(),
4525            side.into(),
4526            operation.into(),
4527            to_i32(m, "m")?,
4528            to_i32(n, "n")?,
4529            to_i32(k, "k")?,
4530            a.as_ptr().cast(),
4531            to_i32(lda, "lda")?,
4532            tau.as_ptr().cast(),
4533            c.as_ptr().cast(),
4534            to_i32(ldc, "ldc")?,
4535            &raw mut lwork,
4536        ))?;
4537    }
4538    to_usize(lwork, "lwork")
4539}
4540
4541pub fn zunmqr_buffer_size(
4542    ctx: &Context,
4543    side: SideMode,
4544    operation: Operation,
4545    m: usize,
4546    n: usize,
4547    k: usize,
4548    a: &DeviceMemory<Complex64>,
4549    lda: usize,
4550    tau: &DeviceMemory<Complex64>,
4551    c: &DeviceMemory<Complex64>,
4552    ldc: usize,
4553) -> Result<usize> {
4554    ctx.bind()?;
4555    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4556    require_tau_buffer(tau, k)?;
4557    validate_matrix(m, n, c.len(), ldc)?;
4558    let mut lwork = 0;
4559    unsafe {
4560        try_ffi!(sys::cusolverDnZunmqr_bufferSize(
4561            ctx.as_raw(),
4562            side.into(),
4563            operation.into(),
4564            to_i32(m, "m")?,
4565            to_i32(n, "n")?,
4566            to_i32(k, "k")?,
4567            a.as_ptr().cast(),
4568            to_i32(lda, "lda")?,
4569            tau.as_ptr().cast(),
4570            c.as_ptr().cast(),
4571            to_i32(ldc, "ldc")?,
4572            &raw mut lwork,
4573        ))?;
4574    }
4575    to_usize(lwork, "lwork")
4576}
4577
4578/// Use the matching buffer-size helper to calculate the required workspace size.
4579///
4580/// The S and D data types are real valued single and double precision, respectively.
4581///
4582/// The C and Z data types are complex valued single and double precision, respectively.
4583///
4584/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
4585/// returned by `geqrf`, to `C` and stores the result in `C`.
4586///
4587/// `operation` selects whether `Q` is transposed.
4588///
4589/// `Q` is of order `m` if `side` = [`SideMode::Left`] and of order `n` if `side` = [`SideMode::Right`].
4590///
4591/// Provide workspace through `workspace`.
4592/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4593/// The workspace size in bytes is `size_of::<T>() * lwork`.
4594///
4595/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4596///
4597/// Callers can combine `geqrf`, `ormqr`, and `trsm` to complete a linear solver or a least-square solver.
4598///
4599/// # Errors
4600///
4601/// Returns an error if cuSOLVER has not been initialized, if the
4602/// matrix dimensions, reflector count, side/operation mode, or leading
4603/// dimensions are invalid, if the current GPU architecture is unsupported, or
4604/// if cuSOLVER reports an internal failure.
4605pub fn sormqr(
4606    ctx: &Context,
4607    side: SideMode,
4608    operation: Operation,
4609    m: usize,
4610    n: usize,
4611    k: usize,
4612    a: &DeviceMemory<f32>,
4613    lda: usize,
4614    tau: &DeviceMemory<f32>,
4615    c: &mut DeviceMemory<f32>,
4616    ldc: usize,
4617    workspace: &mut DeviceMemory<f32>,
4618    dev_info: &mut DeviceMemory<i32>,
4619) -> Result<()> {
4620    ctx.bind()?;
4621    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4622    require_tau_buffer(tau, k)?;
4623    validate_matrix(m, n, c.len(), ldc)?;
4624    require_info_buffer(dev_info)?;
4625    let lwork = sormqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4626    require_workspace(workspace.len(), lwork)?;
4627    unsafe {
4628        try_ffi!(sys::cusolverDnSormqr(
4629            ctx.as_raw(),
4630            side.into(),
4631            operation.into(),
4632            to_i32(m, "m")?,
4633            to_i32(n, "n")?,
4634            to_i32(k, "k")?,
4635            a.as_ptr().cast(),
4636            to_i32(lda, "lda")?,
4637            tau.as_ptr().cast(),
4638            c.as_mut_ptr().cast(),
4639            to_i32(ldc, "ldc")?,
4640            workspace.as_mut_ptr().cast(),
4641            to_i32(lwork, "lwork")?,
4642            dev_info.as_mut_ptr().cast(),
4643        ))?;
4644    }
4645    Ok(())
4646}
4647
4648/// Use the matching buffer-size helper to calculate the required workspace size.
4649///
4650/// The S and D data types are real valued single and double precision, respectively.
4651///
4652/// The C and Z data types are complex valued single and double precision, respectively.
4653///
4654/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
4655/// returned by `geqrf`, to `C` and stores the result in `C`.
4656///
4657/// `operation` selects whether `Q` is transposed.
4658///
4659/// `Q` is of order `m` if `side` = [`SideMode::Left`] and of order `n` if `side` = [`SideMode::Right`].
4660///
4661/// Provide workspace through `workspace`.
4662/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4663/// The workspace size in bytes is `size_of::<T>() * lwork`.
4664///
4665/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4666///
4667/// Callers can combine `geqrf`, `ormqr`, and `trsm` to complete a linear solver or a least-square solver.
4668///
4669/// # Errors
4670///
4671/// Returns an error if cuSOLVER has not been initialized, if the
4672/// matrix dimensions, reflector count, side/operation mode, or leading
4673/// dimensions are invalid, if the current GPU architecture is unsupported, or
4674/// if cuSOLVER reports an internal failure.
4675pub fn dormqr(
4676    ctx: &Context,
4677    side: SideMode,
4678    operation: Operation,
4679    m: usize,
4680    n: usize,
4681    k: usize,
4682    a: &DeviceMemory<f64>,
4683    lda: usize,
4684    tau: &DeviceMemory<f64>,
4685    c: &mut DeviceMemory<f64>,
4686    ldc: usize,
4687    workspace: &mut DeviceMemory<f64>,
4688    dev_info: &mut DeviceMemory<i32>,
4689) -> Result<()> {
4690    ctx.bind()?;
4691    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4692    require_tau_buffer(tau, k)?;
4693    validate_matrix(m, n, c.len(), ldc)?;
4694    require_info_buffer(dev_info)?;
4695    let lwork = dormqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4696    require_workspace(workspace.len(), lwork)?;
4697    unsafe {
4698        try_ffi!(sys::cusolverDnDormqr(
4699            ctx.as_raw(),
4700            side.into(),
4701            operation.into(),
4702            to_i32(m, "m")?,
4703            to_i32(n, "n")?,
4704            to_i32(k, "k")?,
4705            a.as_ptr().cast(),
4706            to_i32(lda, "lda")?,
4707            tau.as_ptr().cast(),
4708            c.as_mut_ptr().cast(),
4709            to_i32(ldc, "ldc")?,
4710            workspace.as_mut_ptr().cast(),
4711            to_i32(lwork, "lwork")?,
4712            dev_info.as_mut_ptr().cast(),
4713        ))?;
4714    }
4715    Ok(())
4716}
4717
4718pub fn cunmqr(
4719    ctx: &Context,
4720    side: SideMode,
4721    operation: Operation,
4722    m: usize,
4723    n: usize,
4724    k: usize,
4725    a: &DeviceMemory<Complex32>,
4726    lda: usize,
4727    tau: &DeviceMemory<Complex32>,
4728    c: &mut DeviceMemory<Complex32>,
4729    ldc: usize,
4730    workspace: &mut DeviceMemory<Complex32>,
4731    dev_info: &mut DeviceMemory<i32>,
4732) -> Result<()> {
4733    ctx.bind()?;
4734    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4735    require_tau_buffer(tau, k)?;
4736    validate_matrix(m, n, c.len(), ldc)?;
4737    require_info_buffer(dev_info)?;
4738    let lwork = cunmqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4739    require_workspace(workspace.len(), lwork)?;
4740    unsafe {
4741        try_ffi!(sys::cusolverDnCunmqr(
4742            ctx.as_raw(),
4743            side.into(),
4744            operation.into(),
4745            to_i32(m, "m")?,
4746            to_i32(n, "n")?,
4747            to_i32(k, "k")?,
4748            a.as_ptr().cast(),
4749            to_i32(lda, "lda")?,
4750            tau.as_ptr().cast(),
4751            c.as_mut_ptr().cast(),
4752            to_i32(ldc, "ldc")?,
4753            workspace.as_mut_ptr().cast(),
4754            to_i32(lwork, "lwork")?,
4755            dev_info.as_mut_ptr().cast(),
4756        ))?;
4757    }
4758    Ok(())
4759}
4760
4761pub fn zunmqr(
4762    ctx: &Context,
4763    side: SideMode,
4764    operation: Operation,
4765    m: usize,
4766    n: usize,
4767    k: usize,
4768    a: &DeviceMemory<Complex64>,
4769    lda: usize,
4770    tau: &DeviceMemory<Complex64>,
4771    c: &mut DeviceMemory<Complex64>,
4772    ldc: usize,
4773    workspace: &mut DeviceMemory<Complex64>,
4774    dev_info: &mut DeviceMemory<i32>,
4775) -> Result<()> {
4776    ctx.bind()?;
4777    validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4778    require_tau_buffer(tau, k)?;
4779    validate_matrix(m, n, c.len(), ldc)?;
4780    require_info_buffer(dev_info)?;
4781    let lwork = zunmqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4782    require_workspace(workspace.len(), lwork)?;
4783    unsafe {
4784        try_ffi!(sys::cusolverDnZunmqr(
4785            ctx.as_raw(),
4786            side.into(),
4787            operation.into(),
4788            to_i32(m, "m")?,
4789            to_i32(n, "n")?,
4790            to_i32(k, "k")?,
4791            a.as_ptr().cast(),
4792            to_i32(lda, "lda")?,
4793            tau.as_ptr().cast(),
4794            c.as_mut_ptr().cast(),
4795            to_i32(ldc, "ldc")?,
4796            workspace.as_mut_ptr().cast(),
4797            to_i32(lwork, "lwork")?,
4798            dev_info.as_mut_ptr().cast(),
4799        ))?;
4800    }
4801    Ok(())
4802}
4803
4804pub fn xgeqrf_buffer_size<TA: DataTypeLike, TTau: DataTypeLike>(
4805    ctx: &Context,
4806    params: &Params,
4807    m: usize,
4808    n: usize,
4809    a: MatrixRef<'_, TA>,
4810    tau: VectorRef<'_, TTau>,
4811    compute_type: DataType,
4812) -> Result<WorkspaceSizes> {
4813    ctx.bind()?;
4814    let a_type = TA::data_type();
4815    let tau_type = TTau::data_type();
4816    validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4817    validate_x_vector(m.min(n), tau.data.byte_len(), tau_type)?;
4818    let mut device_bytes = 0;
4819    let mut host_bytes = 0;
4820    unsafe {
4821        try_ffi!(sys::cusolverDnXgeqrf_bufferSize(
4822            ctx.as_raw(),
4823            params.as_raw(),
4824            to_i64(m, "m")?,
4825            to_i64(n, "n")?,
4826            a_type.into(),
4827            a.data.as_ptr().cast(),
4828            to_i64(a.leading_dimension, "lda")?,
4829            tau_type.into(),
4830            tau.data.as_ptr().cast(),
4831            compute_type.into(),
4832            &raw mut device_bytes,
4833            &raw mut host_bytes,
4834        ))?;
4835    }
4836    Ok(WorkspaceSizes::new(
4837        device_bytes as usize,
4838        host_bytes as usize,
4839    ))
4840}
4841
4842/// Use [`xgeqrf_buffer_size`] to calculate the sizes needed for pre-allocated
4843/// workspace.
4844///
4845/// Computes the QR factorization of an $m \times n$ matrix.
4846///
4847/// Here `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and
4848/// `R` is an $n \times n$ upper triangular matrix.
4849///
4850/// Provide device and host workspace through `workspace`.
4851/// Use [`xgeqrf_buffer_size`] to determine the required sizes for
4852/// `workspace.device` and `workspace.host`.
4853///
4854/// The matrix `R` overwrites the upper triangular part of `A`, including the
4855/// diagonal elements.
4856///
4857/// The matrix `Q` is not formed explicitly. Instead, a sequence of Householder
4858/// vectors is stored in the lower triangular part of `A`.
4859/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `τ`.
4860/// If `v` is the original Householder vector, `q` is the new Householder vector
4861/// corresponding to `τ`.
4862///
4863/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
4864///
4865/// Currently, [`xgeqrf`] supports only the default algorithm.
4866///
4867/// **Algorithms supported by [`xgeqrf`]**
4868///
4869/// | Algorithm | Notes |
4870/// | --- | --- |
4871/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
4872///
4873/// List of input arguments for [`xgeqrf_buffer_size`] and [`xgeqrf`]:
4874///
4875/// The generic cuSOLVER routine separates matrix, tau-vector, and compute data types:
4876/// `data_type_a` is the data type of matrix `A`, `data_type_tau` is the data
4877/// type of `tau`, and `compute_type` is the operation's compute type.
4878/// [`xgeqrf`] only supports the following four combinations.
4879///
4880/// **Valid combination of data type and compute type**
4881///
4882/// | **data_type_a** | **compute_type** | **Meaning** |
4883/// | --- | --- | --- |
4884/// | [`DataType::F32`] | [`DataType::F32`] | `SGEQRF` |
4885/// | [`DataType::F64`] | [`DataType::F64`] | `DGEQRF` |
4886/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGEQRF` |
4887/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGEQRF` |
4888///
4889/// # Errors
4890///
4891/// Returns an error if cuSOLVER has not been initialized, if the
4892/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
4893/// an internal failure.
4894pub fn xgeqrf<TA: DataTypeLike, TTau: DataTypeLike>(
4895    ctx: &Context,
4896    params: &Params,
4897    m: usize,
4898    n: usize,
4899    a: MatrixMut<'_, TA>,
4900    tau: VectorMut<'_, TTau>,
4901    compute_type: DataType,
4902    workspace: ByteWorkspaceMut<'_>,
4903    dev_info: &mut DeviceMemory<i32>,
4904) -> Result<()> {
4905    ctx.bind()?;
4906    let a_type = TA::data_type();
4907    let tau_type = TTau::data_type();
4908    validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4909    validate_x_vector(m.min(n), tau.data.byte_len(), tau_type)?;
4910    require_info_buffer(dev_info)?;
4911    let workspace_sizes =
4912        xgeqrf_buffer_size(ctx, params, m, n, a.as_ref(), tau.as_ref(), compute_type)?;
4913    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
4914    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
4915    unsafe {
4916        try_ffi!(sys::cusolverDnXgeqrf(
4917            ctx.as_raw(),
4918            params.as_raw(),
4919            to_i64(m, "m")?,
4920            to_i64(n, "n")?,
4921            a_type.into(),
4922            a.data.as_mut_ptr().cast(),
4923            to_i64(a.leading_dimension, "lda")?,
4924            tau_type.into(),
4925            tau.data.as_mut_ptr().cast(),
4926            compute_type.into(),
4927            workspace.device.as_mut_ptr().cast(),
4928            workspace_sizes.device_bytes as _,
4929            workspace.host.as_mut_ptr().cast(),
4930            workspace_sizes.host_bytes as _,
4931            dev_info.as_mut_ptr().cast(),
4932        ))?;
4933    }
4934    Ok(())
4935}
4936
4937pub fn xpotrf_buffer_size<TA: DataTypeLike>(
4938    ctx: &Context,
4939    params: &Params,
4940    fill_mode: FillMode,
4941    n: usize,
4942    a: MatrixRef<'_, TA>,
4943    compute_type: DataType,
4944) -> Result<WorkspaceSizes> {
4945    ctx.bind()?;
4946    let a_type = TA::data_type();
4947    validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4948    let mut device_bytes = 0;
4949    let mut host_bytes = 0;
4950    unsafe {
4951        try_ffi!(sys::cusolverDnXpotrf_bufferSize(
4952            ctx.as_raw(),
4953            params.as_raw(),
4954            fill_mode.into(),
4955            to_i64(n, "n")?,
4956            a_type.into(),
4957            a.data.as_ptr().cast(),
4958            to_i64(a.leading_dimension, "lda")?,
4959            compute_type.into(),
4960            &raw mut device_bytes,
4961            &raw mut host_bytes,
4962        ))?;
4963    }
4964    Ok(WorkspaceSizes::new(
4965        device_bytes as usize,
4966        host_bytes as usize,
4967    ))
4968}
4969
4970/// Use [`xpotrf_buffer_size`] to calculate the sizes needed for pre-allocated
4971/// workspace.
4972///
4973/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
4974///
4975/// `A` is an $n \times n$ Hermitian matrix; only its lower or upper triangular
4976/// part is meaningful.
4977/// `fill_mode` indicates which part of the matrix is used.
4978/// The operation leaves the other part untouched.
4979///
4980/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
4981///
4982/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
4983///
4984/// Provide device and host workspace through `workspace`.
4985/// Use [`xpotrf_buffer_size`] to determine the required sizes for
4986/// `workspace.device` and `workspace.host`.
4987///
4988/// If Cholesky factorization fails, some leading minor of `A` is not positive
4989/// definite, or equivalently some diagonal element of `L` or `U` is not a real
4990/// number.
4991/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
4992///
4993/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
4994///
4995/// Currently, [`xpotrf`] supports only the default algorithm.
4996///
4997/// **Algorithms supported by [`xpotrf`]**
4998///
4999/// | Algorithm | Notes |
5000/// | --- | --- |
5001/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5002///
5003/// List of input arguments for [`xpotrf_buffer_size`] and [`xpotrf`]:
5004///
5005/// The generic cuSOLVER routine separates matrix and compute data types: `data_type_a` is
5006/// the data type of matrix `A`, and `compute_type` is the operation's compute
5007/// type.
5008/// [`xpotrf`] only supports the following four combinations.
5009///
5010/// **Valid combination of data type and compute type**
5011///
5012/// | **data_type_a** | **compute_type** | **Meaning** |
5013/// | --- | --- | --- |
5014/// | [`DataType::F32`] | [`DataType::F32`] | `SPOTRF` |
5015/// | [`DataType::F64`] | [`DataType::F64`] | `DPOTRF` |
5016/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CPOTRF` |
5017/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZPOTRF` |
5018///
5019/// # Errors
5020///
5021/// Returns an error if cuSOLVER has not been initialized, if the
5022/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
5023/// an internal failure.
5024pub fn xpotrf<TA: DataTypeLike>(
5025    ctx: &Context,
5026    params: &Params,
5027    fill_mode: FillMode,
5028    n: usize,
5029    a: MatrixMut<'_, TA>,
5030    compute_type: DataType,
5031    workspace: ByteWorkspaceMut<'_>,
5032    dev_info: &mut DeviceMemory<i32>,
5033) -> Result<()> {
5034    ctx.bind()?;
5035    let a_type = TA::data_type();
5036    validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5037    require_info_buffer(dev_info)?;
5038    let workspace_sizes = xpotrf_buffer_size(ctx, params, fill_mode, n, a.as_ref(), compute_type)?;
5039    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5040    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5041    unsafe {
5042        try_ffi!(sys::cusolverDnXpotrf(
5043            ctx.as_raw(),
5044            params.as_raw(),
5045            fill_mode.into(),
5046            to_i64(n, "n")?,
5047            a_type.into(),
5048            a.data.as_mut_ptr().cast(),
5049            to_i64(a.leading_dimension, "lda")?,
5050            compute_type.into(),
5051            workspace.device.as_mut_ptr().cast(),
5052            workspace_sizes.device_bytes as _,
5053            workspace.host.as_mut_ptr().cast(),
5054            workspace_sizes.host_bytes as _,
5055            dev_info.as_mut_ptr().cast(),
5056        ))?;
5057    }
5058    Ok(())
5059}
5060
5061/// Solves a system of linear equations.
5062///
5063/// Here `A` is an $n \times n$ Hermitian matrix; only its lower or upper
5064/// triangular part is meaningful.
5065/// `fill_mode` indicates which part of the matrix is used.
5066/// The operation leaves the other part untouched.
5067///
5068/// Call [`xpotrf`] first to factorize matrix `A`.
5069/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
5070/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
5071///
5072/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
5073///
5074/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5075///
5076/// Currently, [`xpotrs`] supports only the default algorithm.
5077///
5078/// **Algorithms supported by [`xpotrs`]**
5079///
5080/// | Algorithm | Notes |
5081/// | --- | --- |
5082/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5083///
5084/// List of input arguments for [`xpotrs`]:
5085///
5086/// The generic cuSOLVER routine separates matrix data types: `data_type_a` is the data type
5087/// of matrix `A`, and `data_type_b` is the data type of matrix `B`.
5088/// [`xpotrs`] only supports the following four combinations.
5089///
5090/// **Valid combination of data type and compute type**
5091///
5092/// | **data_type_a** | **data_type_b** | **Meaning** |
5093/// | --- | --- | --- |
5094/// | [`DataType::F32`] | [`DataType::F32`] | `SPOTRS` |
5095/// | [`DataType::F64`] | [`DataType::F64`] | `DPOTRS` |
5096/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CPOTRS` |
5097/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZPOTRS` |
5098///
5099/// # Errors
5100///
5101/// Returns an error if cuSOLVER has not been initialized, if the
5102/// matrix dimensions, right-hand-side count, or leading dimensions are
5103/// invalid, or if cuSOLVER reports an internal failure.
5104pub fn xpotrs<TA: DataTypeLike, TB: DataTypeLike>(
5105    ctx: &Context,
5106    params: &Params,
5107    fill_mode: FillMode,
5108    n: usize,
5109    nrhs: usize,
5110    a: MatrixRef<'_, TA>,
5111    b: MatrixMut<'_, TB>,
5112    dev_info: &mut DeviceMemory<i32>,
5113) -> Result<()> {
5114    ctx.bind()?;
5115    let a_type = TA::data_type();
5116    let b_type = TB::data_type();
5117    validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5118    validate_x_matrix(n, nrhs, b.data.byte_len(), b.leading_dimension, b_type)?;
5119    require_info_buffer(dev_info)?;
5120    unsafe {
5121        try_ffi!(sys::cusolverDnXpotrs(
5122            ctx.as_raw(),
5123            params.as_raw(),
5124            fill_mode.into(),
5125            to_i64(n, "n")?,
5126            to_i64(nrhs, "nrhs")?,
5127            a_type.into(),
5128            a.data.as_ptr().cast(),
5129            to_i64(a.leading_dimension, "lda")?,
5130            b_type.into(),
5131            b.data.as_mut_ptr().cast(),
5132            to_i64(b.leading_dimension, "ldb")?,
5133            dev_info.as_mut_ptr().cast(),
5134        ))?;
5135    }
5136    Ok(())
5137}
5138
5139pub fn xtrtri_buffer_size<TA: DataTypeLike>(
5140    ctx: &Context,
5141    fill_mode: FillMode,
5142    diagonal_type: DiagonalType,
5143    n: usize,
5144    a: MatrixRef<'_, TA>,
5145) -> Result<WorkspaceSizes> {
5146    ctx.bind()?;
5147    validate_x_matrix(
5148        n,
5149        n,
5150        a.data.byte_len(),
5151        a.leading_dimension,
5152        TA::data_type(),
5153    )?;
5154    let mut device_bytes = 0;
5155    let mut host_bytes = 0;
5156    unsafe {
5157        try_ffi!(sys::cusolverDnXtrtri_bufferSize(
5158            ctx.as_raw(),
5159            fill_mode.into(),
5160            diagonal_type.into(),
5161            to_i64(n, "n")?,
5162            TA::data_type().into(),
5163            a.data.as_ptr().cast_mut().cast(),
5164            to_i64(a.leading_dimension, "lda")?,
5165            &raw mut device_bytes,
5166            &raw mut host_bytes,
5167        ))?;
5168    }
5169    Ok(WorkspaceSizes::new(
5170        device_bytes as usize,
5171        host_bytes as usize,
5172    ))
5173}
5174
5175/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5176///
5177/// Computes the inverse of a triangular matrix through the generic cuSOLVER routine.
5178///
5179/// `A` is an $n \times n$ triangular matrix, only lower or upper part is meaningful.
5180/// `fill_mode` indicates which part of the matrix is used.
5181/// The other triangular part is left unchanged.
5182///
5183/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular inverse.
5184///
5185/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular inverse.
5186///
5187/// Provide device and host workspace through `workspace`.
5188/// Use [`xtrtri_buffer_size`] to determine the required sizes for
5189/// `workspace.device` and `workspace.host`.
5190///
5191/// If matrix inversion fails, `dev_info = i` shows `A(i, i) = 0`.
5192///
5193/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5194///
5195/// List of input arguments for [`xtrtri_buffer_size`] and [`xtrtri`]:
5196///
5197/// **Valid data types**
5198///
5199/// | Algorithm | Notes |
5200/// | --- | --- |
5201/// | data type | Meaning |
5202/// | [`DataType::F32`] | `STRTRI` |
5203/// | [`DataType::F64`] | `DTRTRI` |
5204/// | [`DataType::ComplexF32`] | `CTRTRI` |
5205/// | [`DataType::ComplexF64`] | `ZTRTRI` |
5206///
5207/// # Errors
5208///
5209/// Returns an error if cuSOLVER has not been initialized, if the
5210/// matrix dimensions or leading dimension are invalid, if the data type is not
5211/// supported, or if cuSOLVER reports an internal failure.
5212pub fn xtrtri<TA: DataTypeLike>(
5213    ctx: &Context,
5214    fill_mode: FillMode,
5215    diagonal_type: DiagonalType,
5216    n: usize,
5217    a: MatrixMut<'_, TA>,
5218    workspace: ByteWorkspaceMut<'_>,
5219    dev_info: &mut DeviceMemory<i32>,
5220) -> Result<()> {
5221    ctx.bind()?;
5222    validate_x_matrix(
5223        n,
5224        n,
5225        a.data.byte_len(),
5226        a.leading_dimension,
5227        TA::data_type(),
5228    )?;
5229    require_info_buffer(dev_info)?;
5230    let workspace_sizes = xtrtri_buffer_size(ctx, fill_mode, diagonal_type, n, a.as_ref())?;
5231    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5232    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5233    unsafe {
5234        try_ffi!(sys::cusolverDnXtrtri(
5235            ctx.as_raw(),
5236            fill_mode.into(),
5237            diagonal_type.into(),
5238            to_i64(n, "n")?,
5239            TA::data_type().into(),
5240            a.data.as_mut_ptr().cast(),
5241            to_i64(a.leading_dimension, "lda")?,
5242            workspace.device.as_mut_ptr().cast(),
5243            workspace_sizes.device_bytes as _,
5244            workspace.host.as_mut_ptr().cast(),
5245            workspace_sizes.host_bytes as _,
5246            dev_info.as_mut_ptr().cast(),
5247        ))?;
5248    }
5249    Ok(())
5250}
5251
5252pub fn xgetrf_buffer_size<TA: DataTypeLike>(
5253    ctx: &Context,
5254    params: &Params,
5255    m: usize,
5256    n: usize,
5257    a: MatrixRef<'_, TA>,
5258    compute_type: DataType,
5259) -> Result<WorkspaceSizes> {
5260    ctx.bind()?;
5261    let a_type = TA::data_type();
5262    validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5263    let mut device_bytes = 0;
5264    let mut host_bytes = 0;
5265    unsafe {
5266        try_ffi!(sys::cusolverDnXgetrf_bufferSize(
5267            ctx.as_raw(),
5268            params.as_raw(),
5269            to_i64(m, "m")?,
5270            to_i64(n, "n")?,
5271            a_type.into(),
5272            a.data.as_ptr().cast(),
5273            to_i64(a.leading_dimension, "lda")?,
5274            compute_type.into(),
5275            &raw mut device_bytes,
5276            &raw mut host_bytes,
5277        ))?;
5278    }
5279    Ok(WorkspaceSizes::new(
5280        device_bytes as usize,
5281        host_bytes as usize,
5282    ))
5283}
5284
5285/// Computes the LU factorization of an $m \times n$ matrix
5286///
5287/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
5288///
5289/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
5290///
5291/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5292///
5293/// If `pivots` is `None`, no pivoting is performed.
5294/// The factorization is `A=L*U`, which is not numerically stable.
5295///
5296/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
5297/// sequence. Row `i` is interchanged with row `pivots[i]`.
5298///
5299/// Provide device and host workspace through `workspace`.
5300/// Use [`xgetrf_buffer_size`] to determine the required sizes for
5301/// `workspace.device` and `workspace.host`.
5302///
5303/// Callers can combine [`xgetrf`] and [`xgetrs`] to complete a linear solver.
5304///
5305/// Currently, [`xgetrf`] supports two algorithms.
5306/// To select the legacy implementation, call [`Params::set_adv_options`].
5307///
5308/// **Algorithms supported by [`xgetrf`]**
5309///
5310/// | Algorithm | Notes |
5311/// | --- | --- |
5312/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Fastest algorithm; requires a large workspace of `m*n` elements. |
5313/// | [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1) | Legacy implementation. |
5314///
5315/// List of input arguments for [`xgetrf_buffer_size`] and [`xgetrf`]:
5316///
5317/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of matrix `A`, and `compute_type` is the operation's compute type.
5318/// [`xgetrf`] only supports the following four combinations.
5319///
5320/// **Valid combination of data type and compute type**
5321///
5322/// | **data_type_a** | **compute_type** | **Meaning** |
5323/// | --- | --- | --- |
5324/// | [`DataType::F32`] | [`DataType::F32`] | `SGETRF` |
5325/// | [`DataType::F64`] | [`DataType::F64`] | `DGETRF` |
5326/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGETRF` |
5327/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGETRF` |
5328///
5329/// # Errors
5330///
5331/// Returns an error if cuSOLVER has not been initialized, if the
5332/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
5333/// an internal failure.
5334pub fn xgetrf<TA: DataTypeLike>(
5335    ctx: &Context,
5336    params: &Params,
5337    m: usize,
5338    n: usize,
5339    a: MatrixMut<'_, TA>,
5340    pivots: Option<&mut DeviceMemory<i64>>,
5341    compute_type: DataType,
5342    workspace: ByteWorkspaceMut<'_>,
5343    dev_info: &mut DeviceMemory<i32>,
5344) -> Result<()> {
5345    ctx.bind()?;
5346    let a_type = TA::data_type();
5347    validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5348    if let Some(pivots) = pivots.as_ref() {
5349        require_pivot64_buffer(pivots, m.min(n))?;
5350    }
5351    require_info_buffer(dev_info)?;
5352    let workspace_sizes = xgetrf_buffer_size(ctx, params, m, n, a.as_ref(), compute_type)?;
5353    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5354    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5355    unsafe {
5356        try_ffi!(sys::cusolverDnXgetrf(
5357            ctx.as_raw(),
5358            params.as_raw(),
5359            to_i64(m, "m")?,
5360            to_i64(n, "n")?,
5361            a_type.into(),
5362            a.data.as_mut_ptr().cast(),
5363            to_i64(a.leading_dimension, "lda")?,
5364            pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
5365            compute_type.into(),
5366            workspace.device.as_mut_ptr().cast(),
5367            workspace_sizes.device_bytes as _,
5368            workspace.host.as_mut_ptr().cast(),
5369            workspace_sizes.host_bytes as _,
5370            dev_info.as_mut_ptr().cast(),
5371        ))?;
5372    }
5373    Ok(())
5374}
5375
5376/// Solves a linear system of multiple right-hand sides
5377///
5378/// where `A` is an $n \times n$ matrix, and was LU-factored by [`xgetrf`], that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
5379/// `B` is an $n \times {nrhs}$ right-hand side matrix.
5380///
5381/// The `operation` argument is described by [`Operation`].
5382///
5383/// `pivots` is an output of [`xgetrf`].
5384/// It contains the pivot indices used to permute the right-hand sides.
5385///
5386/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5387///
5388/// Callers can combine [`xgetrf`] and [`xgetrs`] to complete a linear solver.
5389///
5390/// Currently, [`xgetrs`] supports only the default algorithm.
5391///
5392/// **Algorithms supported by [`xgetrs`]**
5393///
5394/// | Algorithm | Notes |
5395/// | --- | --- |
5396/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5397///
5398/// List of input arguments for [`xgetrs`]:
5399///
5400/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of matrix `A`, and `data_type_b` is the data type of matrix `B`.
5401/// [`xgetrs`] only supports the following four combinations:
5402///
5403/// **Valid combination of data type and compute type**
5404///
5405/// | **data_type_a** | **data_type_b** | **Meaning** |
5406/// | --- | --- | --- |
5407/// | [`DataType::F32`] | [`DataType::F32`] | `SGETRS` |
5408/// | [`DataType::F64`] | [`DataType::F64`] | `DGETRS` |
5409/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGETRS` |
5410/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGETRS` |
5411///
5412/// # Errors
5413///
5414/// Returns an error if cuSOLVER has not been initialized, if the
5415/// matrix dimensions or leading dimensions are invalid, or if cuSOLVER reports
5416/// an internal failure.
5417pub fn xgetrs<TA: DataTypeLike, TB: DataTypeLike>(
5418    ctx: &Context,
5419    params: &Params,
5420    operation: Operation,
5421    n: usize,
5422    nrhs: usize,
5423    a: MatrixRef<'_, TA>,
5424    pivots: &DeviceMemory<i64>,
5425    b: MatrixMut<'_, TB>,
5426    dev_info: &mut DeviceMemory<i32>,
5427) -> Result<()> {
5428    ctx.bind()?;
5429    let a_type = TA::data_type();
5430    let b_type = TB::data_type();
5431    validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5432    require_pivot64_buffer(pivots, n)?;
5433    validate_x_matrix(n, nrhs, b.data.byte_len(), b.leading_dimension, b_type)?;
5434    require_info_buffer(dev_info)?;
5435    unsafe {
5436        try_ffi!(sys::cusolverDnXgetrs(
5437            ctx.as_raw(),
5438            params.as_raw(),
5439            operation.into(),
5440            to_i64(n, "n")?,
5441            to_i64(nrhs, "nrhs")?,
5442            a_type.into(),
5443            a.data.as_ptr().cast(),
5444            to_i64(a.leading_dimension, "lda")?,
5445            pivots.as_ptr().cast(),
5446            b_type.into(),
5447            b.data.as_mut_ptr().cast(),
5448            to_i64(b.leading_dimension, "ldb")?,
5449            dev_info.as_mut_ptr().cast(),
5450        ))?;
5451    }
5452    Ok(())
5453}
5454
5455pub fn xsytrs_buffer_size<TA: DataTypeLike, TB: DataTypeLike>(
5456    ctx: &Context,
5457    fill_mode: FillMode,
5458    n: usize,
5459    nrhs: usize,
5460    a: MatrixRef<'_, TA>,
5461    pivots: Option<&DeviceMemory<i64>>,
5462    b: MatrixRef<'_, TB>,
5463) -> Result<WorkspaceSizes> {
5464    ctx.bind()?;
5465    validate_x_matrix(
5466        n,
5467        n,
5468        a.data.byte_len(),
5469        a.leading_dimension,
5470        TA::data_type(),
5471    )?;
5472    validate_x_matrix(
5473        n,
5474        nrhs,
5475        b.data.byte_len(),
5476        b.leading_dimension,
5477        TB::data_type(),
5478    )?;
5479    if let Some(pivots) = pivots {
5480        require_pivot64_buffer(pivots, n)?;
5481    }
5482
5483    let mut device_bytes = 0;
5484    let mut host_bytes = 0;
5485    unsafe {
5486        try_ffi!(sys::cusolverDnXsytrs_bufferSize(
5487            ctx.as_raw(),
5488            fill_mode.into(),
5489            to_i64(n, "n")?,
5490            to_i64(nrhs, "nrhs")?,
5491            TA::data_type().into(),
5492            a.data.as_ptr().cast(),
5493            to_i64(a.leading_dimension, "lda")?,
5494            pivots.map_or(std::ptr::null(), DeviceMemory::as_ptr),
5495            TB::data_type().into(),
5496            b.data.as_ptr().cast_mut().cast(),
5497            to_i64(b.leading_dimension, "ldb")?,
5498            &raw mut device_bytes,
5499            &raw mut host_bytes,
5500        ))?;
5501    }
5502    Ok(WorkspaceSizes::new(
5503        device_bytes as usize,
5504        host_bytes as usize,
5505    ))
5506}
5507
5508/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5509///
5510/// Solves a system of linear equations through the generic cuSOLVER routine.
5511///
5512/// `A` contains the factorization produced by the typed `*sytrf` operations in this module.
5513/// Only the lower or upper part is meaningful; the other part is left untouched.
5514///
5515/// Provide the pivot indices returned by the matching `*sytrf` operation, along
5516/// with device and host workspace through `workspace`.
5517/// Use [`xsytrs_buffer_size`] to determine the required sizes for
5518/// `workspace.device` and `workspace.host`.
5519/// To factorize and solve the symmetric system without pivoting, pass `None`
5520/// for the pivot buffer to both the matching `*sytrf` operation and [`xsytrs`].
5521///
5522/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
5523///
5524/// List of input arguments for [`xsytrs_buffer_size`] and [`xsytrs`]:
5525///
5526/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of the
5527/// matrix `A`, and `data_type_b` is the data type of the matrix `B`.
5528/// [`xsytrs`] only supports the following four combinations:
5529///
5530/// **Valid combination of data type and compute type**
5531///
5532/// | **data_type_a** | **data_type_b** | **Meaning** |
5533/// | --- | --- | --- |
5534/// | [`DataType::F32`] | [`DataType::F32`] | `SSYTRS` |
5535/// | [`DataType::F64`] | [`DataType::F64`] | `DSYTRS` |
5536/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CSYTRS` |
5537/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZSYTRS` |
5538///
5539/// # Errors
5540///
5541/// Returns an error if cuSOLVER has not been initialized, if the
5542/// matrix dimensions or leading dimension are invalid, if the matrix data type
5543/// is not supported, or if cuSOLVER reports an internal failure.
5544pub fn xsytrs<TA: DataTypeLike, TB: DataTypeLike>(
5545    ctx: &Context,
5546    fill_mode: FillMode,
5547    n: usize,
5548    nrhs: usize,
5549    a: MatrixRef<'_, TA>,
5550    pivots: Option<&DeviceMemory<i64>>,
5551    b: MatrixMut<'_, TB>,
5552    workspace: ByteWorkspaceMut<'_>,
5553    dev_info: &mut DeviceMemory<i32>,
5554) -> Result<()> {
5555    ctx.bind()?;
5556    validate_x_matrix(
5557        n,
5558        n,
5559        a.data.byte_len(),
5560        a.leading_dimension,
5561        TA::data_type(),
5562    )?;
5563    validate_x_matrix(
5564        n,
5565        nrhs,
5566        b.data.byte_len(),
5567        b.leading_dimension,
5568        TB::data_type(),
5569    )?;
5570    if let Some(pivots) = pivots {
5571        require_pivot64_buffer(pivots, n)?;
5572    }
5573    require_info_buffer(dev_info)?;
5574    let workspace_sizes = xsytrs_buffer_size(ctx, fill_mode, n, nrhs, a, pivots, b.as_ref())?;
5575    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5576    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5577    unsafe {
5578        try_ffi!(sys::cusolverDnXsytrs(
5579            ctx.as_raw(),
5580            fill_mode.into(),
5581            to_i64(n, "n")?,
5582            to_i64(nrhs, "nrhs")?,
5583            TA::data_type().into(),
5584            a.data.as_ptr().cast(),
5585            to_i64(a.leading_dimension, "lda")?,
5586            pivots.map_or(std::ptr::null(), DeviceMemory::as_ptr),
5587            TB::data_type().into(),
5588            b.data.as_mut_ptr().cast(),
5589            to_i64(b.leading_dimension, "ldb")?,
5590            workspace.device.as_mut_ptr().cast(),
5591            workspace_sizes.device_bytes as _,
5592            workspace.host.as_mut_ptr().cast(),
5593            workspace_sizes.host_bytes as _,
5594            dev_info.as_mut_ptr().cast(),
5595        ))?;
5596    }
5597    Ok(())
5598}
5599
5600pub fn xlarft_buffer_size<TV: DataTypeLike, TTau: DataTypeLike, TT: DataTypeLike>(
5601    ctx: &Context,
5602    params: &Params,
5603    direct: DirectMode,
5604    storev: StorevMode,
5605    n: usize,
5606    k: usize,
5607    v: MatrixRef<'_, TV>,
5608    tau: VectorRef<'_, TTau>,
5609    t: MatrixRef<'_, TT>,
5610    compute_type: DataType,
5611) -> Result<WorkspaceSizes> {
5612    ctx.bind()?;
5613    let v_type = TV::data_type();
5614    let tau_type = TTau::data_type();
5615    let t_type = TT::data_type();
5616    validate_xlarft_inputs(
5617        n,
5618        k,
5619        storev,
5620        v.data.byte_len(),
5621        v.leading_dimension,
5622        v_type,
5623        tau.data.byte_len(),
5624        tau_type,
5625        t.data.byte_len(),
5626        t.leading_dimension,
5627        t_type,
5628    )?;
5629    let mut device_bytes = 0;
5630    let mut host_bytes = 0;
5631    unsafe {
5632        try_ffi!(sys::cusolverDnXlarft_bufferSize(
5633            ctx.as_raw(),
5634            params.as_raw(),
5635            direct.into(),
5636            storev.into(),
5637            to_i64(n, "n")?,
5638            to_i64(k, "k")?,
5639            v_type.into(),
5640            v.data.as_ptr().cast(),
5641            to_i64(v.leading_dimension, "ldv")?,
5642            tau_type.into(),
5643            tau.data.as_ptr().cast(),
5644            t_type.into(),
5645            t.data.as_ptr().cast_mut().cast(),
5646            to_i64(t.leading_dimension, "ldt")?,
5647            compute_type.into(),
5648            &raw mut device_bytes,
5649            &raw mut host_bytes,
5650        ))?;
5651    }
5652    Ok(WorkspaceSizes::new(
5653        device_bytes as usize,
5654        host_bytes as usize,
5655    ))
5656}
5657
5658/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5659///
5660/// Forms the triangular factor `T` of a real block reflector `H` of order `n`,
5661/// which is defined as a product of `k` elementary reflectors.
5662///
5663/// Only [`StorevMode::Columnwise`] storage is supported. This means the vector
5664/// defining the elementary reflector `H(i)` is stored in the `i`th column of
5665/// `V`, and $H = I - V \cdot T \cdot V^{T}$ ($H = I - V \cdot T \cdot V^{H}$
5666/// for complex types).
5667///
5668/// Provide device and host workspace through `workspace`.
5669/// Use [`xlarft_buffer_size`] to determine the required sizes for
5670/// `workspace.device` and `workspace.host`.
5671///
5672/// Currently, only the `n >= k` scenario is supported.
5673///
5674/// The generic cuSOLVER routine has four data types:
5675///
5676/// [`xlarft`] only supports the following four combinations.
5677///
5678/// **Valid combinations of data types and compute types**
5679///
5680/// | **data_type_v** | **data_type_tau** | **data_type_t** | **compute_type** | **Meaning** |
5681/// | --- | --- | --- | --- | --- |
5682/// | [`DataType::F32`] | [`DataType::F32`] | [`DataType::F32`] | [`DataType::F32`] | `SLARFT` |
5683/// | [`DataType::F64`] | [`DataType::F64`] | [`DataType::F64`] | [`DataType::F64`] | `DLARFT` |
5684/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CLARFT` |
5685/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZLARFT` |
5686///
5687/// # Errors
5688///
5689/// Returns an error if cuSOLVER has not been initialized, if the
5690/// reflector dimensions or storage mode are invalid, or if cuSOLVER reports an
5691/// internal failure.
5692pub fn xlarft<TV: DataTypeLike, TTau: DataTypeLike, TT: DataTypeLike>(
5693    ctx: &Context,
5694    params: &Params,
5695    direct: DirectMode,
5696    storev: StorevMode,
5697    n: usize,
5698    k: usize,
5699    v: MatrixRef<'_, TV>,
5700    tau: VectorRef<'_, TTau>,
5701    t: MatrixMut<'_, TT>,
5702    compute_type: DataType,
5703    workspace: ByteWorkspaceMut<'_>,
5704) -> Result<()> {
5705    ctx.bind()?;
5706    let v_type = TV::data_type();
5707    let tau_type = TTau::data_type();
5708    let t_type = TT::data_type();
5709    validate_xlarft_inputs(
5710        n,
5711        k,
5712        storev,
5713        v.data.byte_len(),
5714        v.leading_dimension,
5715        v_type,
5716        tau.data.byte_len(),
5717        tau_type,
5718        t.data.byte_len(),
5719        t.leading_dimension,
5720        t_type,
5721    )?;
5722    let workspace_sizes = xlarft_buffer_size(
5723        ctx,
5724        params,
5725        direct,
5726        storev,
5727        n,
5728        k,
5729        v,
5730        tau,
5731        t.as_ref(),
5732        compute_type,
5733    )?;
5734    require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5735    require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5736    unsafe {
5737        try_ffi!(sys::cusolverDnXlarft(
5738            ctx.as_raw(),
5739            params.as_raw(),
5740            direct.into(),
5741            storev.into(),
5742            to_i64(n, "n")?,
5743            to_i64(k, "k")?,
5744            v_type.into(),
5745            v.data.as_ptr().cast(),
5746            to_i64(v.leading_dimension, "ldv")?,
5747            tau_type.into(),
5748            tau.data.as_ptr().cast(),
5749            t_type.into(),
5750            t.data.as_mut_ptr().cast(),
5751            to_i64(t.leading_dimension, "ldt")?,
5752            compute_type.into(),
5753            workspace.device.as_mut_ptr().cast(),
5754            workspace_sizes.device_bytes as _,
5755            workspace.host.as_mut_ptr().cast(),
5756            workspace_sizes.host_bytes as _,
5757        ))?;
5758    }
5759    Ok(())
5760}
5761
5762fn validate_square_matrix(n: usize, len: usize, lda: usize) -> Result<()> {
5763    validate_matrix(n, n, len, lda)
5764}
5765
5766fn validate_matrix(rows: usize, cols: usize, len: usize, lda: usize) -> Result<()> {
5767    if rows == 0 || cols == 0 {
5768        return Err(Error::InvalidMatrixShape);
5769    }
5770    if lda < rows {
5771        return Err(Error::InvalidLeadingDimension);
5772    }
5773    let required = lda.checked_mul(cols).ok_or(Error::InvalidMatrixShape)?;
5774    if len < required {
5775        return Err(Error::InvalidMatrixShape);
5776    }
5777    Ok(())
5778}
5779
5780fn require_workspace(actual: usize, required: usize) -> Result<()> {
5781    if actual < required {
5782        return Err(Error::InsufficientWorkspaceSize { required, actual });
5783    }
5784    Ok(())
5785}
5786
5787fn require_workspace_bytes(actual: usize, required: usize) -> Result<()> {
5788    if actual < required {
5789        return Err(Error::InsufficientWorkspaceSize { required, actual });
5790    }
5791    Ok(())
5792}
5793
5794fn require_host_workspace(actual: usize, required: usize) -> Result<()> {
5795    if actual < required {
5796        return Err(Error::InsufficientWorkspaceSize { required, actual });
5797    }
5798    Ok(())
5799}
5800
5801fn require_info_buffer(dev_info: &DeviceMemory<i32>) -> Result<()> {
5802    if dev_info.is_empty() {
5803        return Err(Error::InvalidVectorShape);
5804    }
5805    Ok(())
5806}
5807
5808fn require_info_entries(dev_info: &DeviceMemory<i32>, required: usize) -> Result<()> {
5809    if dev_info.len() < required {
5810        return Err(Error::InvalidVectorShape);
5811    }
5812    Ok(())
5813}
5814
5815fn require_pivot_buffer(pivots: &DeviceMemory<i32>, required: usize) -> Result<()> {
5816    if pivots.len() < required {
5817        return Err(Error::InvalidVectorShape);
5818    }
5819    Ok(())
5820}
5821
5822fn require_pivot64_buffer(pivots: &DeviceMemory<i64>, required: usize) -> Result<()> {
5823    if pivots.len() < required {
5824        return Err(Error::InvalidVectorShape);
5825    }
5826    Ok(())
5827}
5828
5829fn require_tau_buffer<T>(tau: &DeviceMemory<T>, required: usize) -> Result<()> {
5830    if tau.len() < required {
5831        return Err(Error::InvalidVectorShape);
5832    }
5833    Ok(())
5834}
5835
5836fn qr_rows(side: SideMode, m: usize, n: usize) -> usize {
5837    match side {
5838        SideMode::Left => m,
5839        SideMode::Right => n,
5840    }
5841}
5842
5843fn tridiagonal_order(side: SideMode, m: usize, n: usize) -> usize {
5844    match side {
5845        SideMode::Left => m,
5846        SideMode::Right => n,
5847    }
5848}
5849
5850fn validate_bidiagonal_dims(m: usize, n: usize) -> Result<()> {
5851    if m == 0 || n == 0 || m < n {
5852        return Err(Error::InvalidMatrixShape);
5853    }
5854    Ok(())
5855}
5856
5857fn validate_bidiagonal_buffers(
5858    m: usize,
5859    n: usize,
5860    a_len: usize,
5861    lda: usize,
5862    d_len: usize,
5863    e_len: usize,
5864    tauq_len: usize,
5865    taup_len: usize,
5866) -> Result<()> {
5867    validate_bidiagonal_dims(m, n)?;
5868    validate_matrix(m, n, a_len, lda)?;
5869    if d_len < n || e_len < n || tauq_len < n || taup_len < n {
5870        return Err(Error::InvalidVectorShape);
5871    }
5872    Ok(())
5873}
5874
5875fn validate_orgbr_inputs(
5876    side: SideMode,
5877    m: usize,
5878    n: usize,
5879    k: usize,
5880    a_len: usize,
5881    lda: usize,
5882    tau_len: usize,
5883) -> Result<()> {
5884    if m == 0 || n == 0 || k == 0 {
5885        return Err(Error::InvalidMatrixShape);
5886    }
5887    validate_matrix(m, n, a_len, lda)?;
5888    if tau_len < k {
5889        return Err(Error::InvalidVectorShape);
5890    }
5891    match side {
5892        SideMode::Left if m < n || k > m => Err(Error::InvalidMatrixShape),
5893        SideMode::Right if n < m || k > n => Err(Error::InvalidMatrixShape),
5894        _ => Ok(()),
5895    }
5896}
5897
5898fn validate_sytrd_inputs(
5899    n: usize,
5900    a_len: usize,
5901    lda: usize,
5902    d_len: usize,
5903    e_len: usize,
5904    tau_len: usize,
5905) -> Result<()> {
5906    validate_square_matrix(n, a_len, lda)?;
5907    let reflectors = n.saturating_sub(1);
5908    if d_len < n || e_len < reflectors || tau_len < reflectors {
5909        return Err(Error::InvalidVectorShape);
5910    }
5911    Ok(())
5912}
5913
5914fn validate_orgtr_inputs(n: usize, a_len: usize, lda: usize, tau_len: usize) -> Result<()> {
5915    validate_square_matrix(n, a_len, lda)?;
5916    if tau_len < n.saturating_sub(1) {
5917        return Err(Error::InvalidVectorShape);
5918    }
5919    Ok(())
5920}
5921
5922fn validate_ormtr_inputs(
5923    side: SideMode,
5924    m: usize,
5925    n: usize,
5926    a_len: usize,
5927    lda: usize,
5928    tau_len: usize,
5929    c_len: usize,
5930    ldc: usize,
5931) -> Result<()> {
5932    let nq = tridiagonal_order(side, m, n);
5933    validate_square_matrix(nq, a_len, lda)?;
5934    validate_matrix(m, n, c_len, ldc)?;
5935    if tau_len < nq.saturating_sub(1) {
5936        return Err(Error::InvalidVectorShape);
5937    }
5938    Ok(())
5939}
5940
5941fn validate_batched_square_matrix_pointers<T>(
5942    n: usize,
5943    matrices: BatchedMatrixRef<'_, T>,
5944) -> Result<()> {
5945    if n == 0 || matrices.is_empty() {
5946        return Err(Error::InvalidMatrixShape);
5947    }
5948    if matrices.leading_dimension < n {
5949        return Err(Error::InvalidLeadingDimension);
5950    }
5951    Ok(())
5952}
5953
5954fn validate_batched_vector_pointers<T>(n: usize, vectors: BatchedVectorRef<'_, T>) -> Result<()> {
5955    if n == 0 || vectors.is_empty() {
5956        return Err(Error::InvalidVectorShape);
5957    }
5958    if vectors.leading_dimension < n {
5959        return Err(Error::InvalidLeadingDimension);
5960    }
5961    Ok(())
5962}
5963
5964fn validate_x_matrix(
5965    rows: usize,
5966    cols: usize,
5967    bytes: usize,
5968    lda: usize,
5969    data_type: DataType,
5970) -> Result<()> {
5971    if rows == 0 || cols == 0 {
5972        return Err(Error::InvalidMatrixShape);
5973    }
5974    if lda < rows {
5975        return Err(Error::InvalidLeadingDimension);
5976    }
5977    let required = lda
5978        .checked_mul(cols)
5979        .and_then(|count| count.checked_mul(data_type.size_in_bytes()))
5980        .ok_or(Error::InvalidMatrixShape)?;
5981    if bytes < required {
5982        return Err(Error::InvalidMatrixShape);
5983    }
5984    Ok(())
5985}
5986
5987fn validate_x_vector(len: usize, bytes: usize, data_type: DataType) -> Result<()> {
5988    let required = len
5989        .checked_mul(data_type.size_in_bytes())
5990        .ok_or(Error::InvalidVectorShape)?;
5991    if bytes < required {
5992        return Err(Error::InvalidVectorShape);
5993    }
5994    Ok(())
5995}
5996
5997fn validate_xlarft_inputs(
5998    n: usize,
5999    k: usize,
6000    storev: StorevMode,
6001    v_bytes: usize,
6002    ldv: usize,
6003    v_type: DataType,
6004    tau_bytes: usize,
6005    tau_type: DataType,
6006    t_bytes: usize,
6007    ldt: usize,
6008    t_type: DataType,
6009) -> Result<()> {
6010    if n == 0 || k == 0 || k > n {
6011        return Err(Error::InvalidMatrixShape);
6012    }
6013    if storev != StorevMode::Columnwise {
6014        return Err(Error::InvalidMatrixShape);
6015    }
6016    validate_x_matrix(n, k, v_bytes, ldv, v_type)?;
6017    validate_x_vector(k, tau_bytes, tau_type)?;
6018    validate_x_matrix(k, k, t_bytes, ldt, t_type)?;
6019    Ok(())
6020}
singe_cusolver/dense.rs

singe_cusolver/
dense.rs