singe_cusolver/dense.rs
1#[allow(unused_imports)]
2use crate::error::Status;
3
4use singe_cuda::{
5 data_type::{DataType, DataTypeLike},
6 memory::DeviceMemory,
7 types::{Complex32, Complex64},
8};
9
10use crate::{
11 context::Context,
12 error::{Error, Result},
13 layout::{
14 BatchedMatrixRef, BatchedVectorRef, ByteWorkspaceMut, MatrixMut, MatrixRef, VectorMut,
15 VectorRef, WorkspaceSizes,
16 },
17 params::Params,
18 sys, try_ffi,
19 types::{DiagonalType, DirectMode, FillMode, Operation, SideMode, StorevMode},
20 utility::{to_i32, to_i64, to_usize},
21};
22
23pub fn spotrf_buffer_size(
24 ctx: &Context,
25 fill_mode: FillMode,
26 n: usize,
27 a: &mut DeviceMemory<f32>,
28 lda: usize,
29) -> Result<usize> {
30 ctx.bind()?;
31 validate_square_matrix(n, a.len(), lda)?;
32 let mut lwork = 0;
33 unsafe {
34 try_ffi!(sys::cusolverDnSpotrf_bufferSize(
35 ctx.as_raw(),
36 fill_mode.into(),
37 to_i32(n, "n")?,
38 a.as_mut_ptr().cast(),
39 to_i32(lda, "lda")?,
40 &raw mut lwork,
41 ))?;
42 }
43 to_usize(lwork, "lwork")
44}
45
46pub fn dpotrf_buffer_size(
47 ctx: &Context,
48 fill_mode: FillMode,
49 n: usize,
50 a: &mut DeviceMemory<f64>,
51 lda: usize,
52) -> Result<usize> {
53 ctx.bind()?;
54 validate_square_matrix(n, a.len(), lda)?;
55 let mut lwork = 0;
56 unsafe {
57 try_ffi!(sys::cusolverDnDpotrf_bufferSize(
58 ctx.as_raw(),
59 fill_mode.into(),
60 to_i32(n, "n")?,
61 a.as_mut_ptr().cast(),
62 to_i32(lda, "lda")?,
63 &raw mut lwork,
64 ))?;
65 }
66 to_usize(lwork, "lwork")
67}
68
69pub fn cpotrf_buffer_size(
70 ctx: &Context,
71 fill_mode: FillMode,
72 n: usize,
73 a: &mut DeviceMemory<Complex32>,
74 lda: usize,
75) -> Result<usize> {
76 ctx.bind()?;
77 validate_square_matrix(n, a.len(), lda)?;
78 let mut lwork = 0;
79 unsafe {
80 try_ffi!(sys::cusolverDnCpotrf_bufferSize(
81 ctx.as_raw(),
82 fill_mode.into(),
83 to_i32(n, "n")?,
84 a.as_mut_ptr().cast(),
85 to_i32(lda, "lda")?,
86 &raw mut lwork,
87 ))?;
88 }
89 to_usize(lwork, "lwork")
90}
91
92pub fn zpotrf_buffer_size(
93 ctx: &Context,
94 fill_mode: FillMode,
95 n: usize,
96 a: &mut DeviceMemory<Complex64>,
97 lda: usize,
98) -> Result<usize> {
99 ctx.bind()?;
100 validate_square_matrix(n, a.len(), lda)?;
101 let mut lwork = 0;
102 unsafe {
103 try_ffi!(sys::cusolverDnZpotrf_bufferSize(
104 ctx.as_raw(),
105 fill_mode.into(),
106 to_i32(n, "n")?,
107 a.as_mut_ptr().cast(),
108 to_i32(lda, "lda")?,
109 &raw mut lwork,
110 ))?;
111 }
112 to_usize(lwork, "lwork")
113}
114
115/// Use the matching buffer-size helper to calculate the required workspace size.
116///
117/// The S and D data types are real valued single and double precision, respectively.
118///
119/// The C and Z data types are complex valued single and double precision, respectively.
120///
121/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
122///
123/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
124/// `fill_mode` indicates which part of the matrix is used.
125/// The other triangular part is left unchanged.
126///
127/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
128///
129/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
130///
131/// Provide workspace through `workspace`.
132/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
133///
134/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
135/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
136///
137/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
138///
139/// # Errors
140///
141/// Returns an error if cuSOLVER has not been initialized, if the
142/// matrix dimensions or leading dimension are invalid, if the current GPU
143/// architecture is unsupported, or if cuSOLVER reports an internal failure.
144pub fn spotrf(
145 ctx: &Context,
146 fill_mode: FillMode,
147 n: usize,
148 a: &mut DeviceMemory<f32>,
149 lda: usize,
150 workspace: &mut DeviceMemory<f32>,
151 dev_info: &mut DeviceMemory<i32>,
152) -> Result<()> {
153 ctx.bind()?;
154 validate_square_matrix(n, a.len(), lda)?;
155 require_info_buffer(dev_info)?;
156 let lwork = spotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
157 require_workspace(workspace.len(), lwork)?;
158 unsafe {
159 try_ffi!(sys::cusolverDnSpotrf(
160 ctx.as_raw(),
161 fill_mode.into(),
162 to_i32(n, "n")?,
163 a.as_mut_ptr().cast(),
164 to_i32(lda, "lda")?,
165 workspace.as_mut_ptr().cast(),
166 to_i32(lwork, "lwork")?,
167 dev_info.as_mut_ptr().cast(),
168 ))?;
169 }
170 Ok(())
171}
172
173/// Use the matching buffer-size helper to calculate the required workspace size.
174///
175/// The S and D data types are real valued single and double precision, respectively.
176///
177/// The C and Z data types are complex valued single and double precision, respectively.
178///
179/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
180///
181/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
182/// `fill_mode` indicates which part of the matrix is used.
183/// The other triangular part is left unchanged.
184///
185/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
186///
187/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
188///
189/// Provide workspace through `workspace`.
190/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
191///
192/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
193/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
194///
195/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
196///
197/// # Errors
198///
199/// Returns an error if cuSOLVER has not been initialized, if the
200/// matrix dimensions or leading dimension are invalid, if the current GPU
201/// architecture is unsupported, or if cuSOLVER reports an internal failure.
202pub fn dpotrf(
203 ctx: &Context,
204 fill_mode: FillMode,
205 n: usize,
206 a: &mut DeviceMemory<f64>,
207 lda: usize,
208 workspace: &mut DeviceMemory<f64>,
209 dev_info: &mut DeviceMemory<i32>,
210) -> Result<()> {
211 ctx.bind()?;
212 validate_square_matrix(n, a.len(), lda)?;
213 require_info_buffer(dev_info)?;
214 let lwork = dpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
215 require_workspace(workspace.len(), lwork)?;
216 unsafe {
217 try_ffi!(sys::cusolverDnDpotrf(
218 ctx.as_raw(),
219 fill_mode.into(),
220 to_i32(n, "n")?,
221 a.as_mut_ptr().cast(),
222 to_i32(lda, "lda")?,
223 workspace.as_mut_ptr().cast(),
224 to_i32(lwork, "lwork")?,
225 dev_info.as_mut_ptr().cast(),
226 ))?;
227 }
228 Ok(())
229}
230
231/// Use the matching buffer-size helper to calculate the required workspace size.
232///
233/// The S and D data types are real valued single and double precision, respectively.
234///
235/// The C and Z data types are complex valued single and double precision, respectively.
236///
237/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
238///
239/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
240/// `fill_mode` indicates which part of the matrix is used.
241/// The other triangular part is left unchanged.
242///
243/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
244///
245/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
246///
247/// Provide workspace through `workspace`.
248/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
249///
250/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
251/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
252///
253/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
254///
255/// # Errors
256///
257/// Returns an error if cuSOLVER has not been initialized, if the
258/// matrix dimensions or leading dimension are invalid, if the current GPU
259/// architecture is unsupported, or if cuSOLVER reports an internal failure.
260pub fn cpotrf(
261 ctx: &Context,
262 fill_mode: FillMode,
263 n: usize,
264 a: &mut DeviceMemory<Complex32>,
265 lda: usize,
266 workspace: &mut DeviceMemory<Complex32>,
267 dev_info: &mut DeviceMemory<i32>,
268) -> Result<()> {
269 ctx.bind()?;
270 validate_square_matrix(n, a.len(), lda)?;
271 require_info_buffer(dev_info)?;
272 let lwork = cpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
273 require_workspace(workspace.len(), lwork)?;
274 unsafe {
275 try_ffi!(sys::cusolverDnCpotrf(
276 ctx.as_raw(),
277 fill_mode.into(),
278 to_i32(n, "n")?,
279 a.as_mut_ptr().cast(),
280 to_i32(lda, "lda")?,
281 workspace.as_mut_ptr().cast(),
282 to_i32(lwork, "lwork")?,
283 dev_info.as_mut_ptr().cast(),
284 ))?;
285 }
286 Ok(())
287}
288
289/// Use the matching buffer-size helper to calculate the required workspace size.
290///
291/// The S and D data types are real valued single and double precision, respectively.
292///
293/// The C and Z data types are complex valued single and double precision, respectively.
294///
295/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
296///
297/// `A` is an $n \times n$ Hermitian matrix, only the lower or upper part is meaningful.
298/// `fill_mode` indicates which part of the matrix is used.
299/// The other triangular part is left unchanged.
300///
301/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed, and replaced by the lower triangular Cholesky factor `L`.
302///
303/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
304///
305/// Provide workspace through `workspace`.
306/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
307///
308/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
309/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
310///
311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
312///
313/// # Errors
314///
315/// Returns an error if cuSOLVER has not been initialized, if the
316/// matrix dimensions or leading dimension are invalid, if the current GPU
317/// architecture is unsupported, or if cuSOLVER reports an internal failure.
318pub fn zpotrf(
319 ctx: &Context,
320 fill_mode: FillMode,
321 n: usize,
322 a: &mut DeviceMemory<Complex64>,
323 lda: usize,
324 workspace: &mut DeviceMemory<Complex64>,
325 dev_info: &mut DeviceMemory<i32>,
326) -> Result<()> {
327 ctx.bind()?;
328 validate_square_matrix(n, a.len(), lda)?;
329 require_info_buffer(dev_info)?;
330 let lwork = zpotrf_buffer_size(ctx, fill_mode, n, a, lda)?;
331 require_workspace(workspace.len(), lwork)?;
332 unsafe {
333 try_ffi!(sys::cusolverDnZpotrf(
334 ctx.as_raw(),
335 fill_mode.into(),
336 to_i32(n, "n")?,
337 a.as_mut_ptr().cast(),
338 to_i32(lda, "lda")?,
339 workspace.as_mut_ptr().cast(),
340 to_i32(lwork, "lwork")?,
341 dev_info.as_mut_ptr().cast(),
342 ))?;
343 }
344 Ok(())
345}
346
347/// Solves a system of linear equations
348///
349/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
350/// `fill_mode` indicates which part of the matrix is used.
351/// The other triangular part is left unchanged.
352///
353/// Call `potrf` first to factorize matrix `A`.
354/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
355/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
356///
357/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
358///
359/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
360///
361/// # Errors
362///
363/// Returns an error if cuSOLVER has not been initialized, if the
364/// matrix dimensions, right-hand-side count, or leading dimensions are
365/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
366/// reports an internal failure.
367pub fn spotrs(
368 ctx: &Context,
369 fill_mode: FillMode,
370 n: usize,
371 nrhs: usize,
372 a: &DeviceMemory<f32>,
373 lda: usize,
374 b: &mut DeviceMemory<f32>,
375 ldb: usize,
376 dev_info: &mut DeviceMemory<i32>,
377) -> Result<()> {
378 ctx.bind()?;
379 validate_square_matrix(n, a.len(), lda)?;
380 validate_matrix(n, nrhs, b.len(), ldb)?;
381 require_info_buffer(dev_info)?;
382 unsafe {
383 try_ffi!(sys::cusolverDnSpotrs(
384 ctx.as_raw(),
385 fill_mode.into(),
386 to_i32(n, "n")?,
387 to_i32(nrhs, "nrhs")?,
388 a.as_ptr().cast(),
389 to_i32(lda, "lda")?,
390 b.as_mut_ptr().cast(),
391 to_i32(ldb, "ldb")?,
392 dev_info.as_mut_ptr().cast(),
393 ))?;
394 }
395 Ok(())
396}
397
398/// Solves a system of linear equations
399///
400/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
401/// `fill_mode` indicates which part of the matrix is used.
402/// The other triangular part is left unchanged.
403///
404/// Call `potrf` first to factorize matrix `A`.
405/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
406/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
407///
408/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
409///
410/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
411///
412/// # Errors
413///
414/// Returns an error if cuSOLVER has not been initialized, if the
415/// matrix dimensions, right-hand-side count, or leading dimensions are
416/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
417/// reports an internal failure.
418pub fn dpotrs(
419 ctx: &Context,
420 fill_mode: FillMode,
421 n: usize,
422 nrhs: usize,
423 a: &DeviceMemory<f64>,
424 lda: usize,
425 b: &mut DeviceMemory<f64>,
426 ldb: usize,
427 dev_info: &mut DeviceMemory<i32>,
428) -> Result<()> {
429 ctx.bind()?;
430 validate_square_matrix(n, a.len(), lda)?;
431 validate_matrix(n, nrhs, b.len(), ldb)?;
432 require_info_buffer(dev_info)?;
433 unsafe {
434 try_ffi!(sys::cusolverDnDpotrs(
435 ctx.as_raw(),
436 fill_mode.into(),
437 to_i32(n, "n")?,
438 to_i32(nrhs, "nrhs")?,
439 a.as_ptr().cast(),
440 to_i32(lda, "lda")?,
441 b.as_mut_ptr().cast(),
442 to_i32(ldb, "ldb")?,
443 dev_info.as_mut_ptr().cast(),
444 ))?;
445 }
446 Ok(())
447}
448
449/// Solves a system of linear equations
450///
451/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
452/// `fill_mode` indicates which part of the matrix is used.
453/// The other triangular part is left unchanged.
454///
455/// Call `potrf` first to factorize matrix `A`.
456/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
457/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
458///
459/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
460///
461/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
462///
463/// # Errors
464///
465/// Returns an error if cuSOLVER has not been initialized, if the
466/// matrix dimensions, right-hand-side count, or leading dimensions are
467/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
468/// reports an internal failure.
469pub fn cpotrs(
470 ctx: &Context,
471 fill_mode: FillMode,
472 n: usize,
473 nrhs: usize,
474 a: &DeviceMemory<Complex32>,
475 lda: usize,
476 b: &mut DeviceMemory<Complex32>,
477 ldb: usize,
478 dev_info: &mut DeviceMemory<i32>,
479) -> Result<()> {
480 ctx.bind()?;
481 validate_square_matrix(n, a.len(), lda)?;
482 validate_matrix(n, nrhs, b.len(), ldb)?;
483 require_info_buffer(dev_info)?;
484 unsafe {
485 try_ffi!(sys::cusolverDnCpotrs(
486 ctx.as_raw(),
487 fill_mode.into(),
488 to_i32(n, "n")?,
489 to_i32(nrhs, "nrhs")?,
490 a.as_ptr().cast(),
491 to_i32(lda, "lda")?,
492 b.as_mut_ptr().cast(),
493 to_i32(ldb, "ldb")?,
494 dev_info.as_mut_ptr().cast(),
495 ))?;
496 }
497 Ok(())
498}
499
500/// Solves a system of linear equations
501///
502/// where `A` is an $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
503/// `fill_mode` indicates which part of the matrix is used.
504/// The other triangular part is left unchanged.
505///
506/// Call `potrf` first to factorize matrix `A`.
507/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^H$.
508/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
509///
510/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
511///
512/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
513///
514/// # Errors
515///
516/// Returns an error if cuSOLVER has not been initialized, if the
517/// matrix dimensions, right-hand-side count, or leading dimensions are
518/// invalid, if the current GPU architecture is unsupported, or if cuSOLVER
519/// reports an internal failure.
520pub fn zpotrs(
521 ctx: &Context,
522 fill_mode: FillMode,
523 n: usize,
524 nrhs: usize,
525 a: &DeviceMemory<Complex64>,
526 lda: usize,
527 b: &mut DeviceMemory<Complex64>,
528 ldb: usize,
529 dev_info: &mut DeviceMemory<i32>,
530) -> Result<()> {
531 ctx.bind()?;
532 validate_square_matrix(n, a.len(), lda)?;
533 validate_matrix(n, nrhs, b.len(), ldb)?;
534 require_info_buffer(dev_info)?;
535 unsafe {
536 try_ffi!(sys::cusolverDnZpotrs(
537 ctx.as_raw(),
538 fill_mode.into(),
539 to_i32(n, "n")?,
540 to_i32(nrhs, "nrhs")?,
541 a.as_ptr().cast(),
542 to_i32(lda, "lda")?,
543 b.as_mut_ptr().cast(),
544 to_i32(ldb, "ldb")?,
545 dev_info.as_mut_ptr().cast(),
546 ))?;
547 }
548 Ok(())
549}
550
551/// The S and D data types are real valued single and double precision, respectively.
552///
553/// The C and Z data types are complex valued single and double precision, respectively.
554///
555/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
556///
557/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
558/// `fill_mode` indicates which part of the matrix is used.
559///
560/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
561///
562/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
563///
564/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
565/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
566///
567/// `info` must have one integer entry for each matrix in the batch.
568/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
569/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
570///
571/// The other part of `A` is used as workspace.
572/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
573///
574/// # Errors
575///
576/// Returns an error if cuSOLVER has not been initialized, if the
577/// matrix dimensions, leading dimension, or batch size are invalid, or if
578/// cuSOLVER reports an internal failure.
579pub fn spotrf_batched(
580 ctx: &Context,
581 fill_mode: FillMode,
582 n: usize,
583 a: BatchedMatrixRef<'_, f32>,
584 info: &mut DeviceMemory<i32>,
585) -> Result<()> {
586 ctx.bind()?;
587 validate_batched_square_matrix_pointers(n, a)?;
588 require_info_entries(info, a.len())?;
589 unsafe {
590 try_ffi!(sys::cusolverDnSpotrfBatched(
591 ctx.as_raw(),
592 fill_mode.into(),
593 to_i32(n, "n")?,
594 a.as_mut_ptr(),
595 to_i32(a.leading_dimension, "lda")?,
596 info.as_mut_ptr().cast(),
597 to_i32(a.len(), "batch_size")?,
598 ))?;
599 }
600 Ok(())
601}
602
603/// The S and D data types are real valued single and double precision, respectively.
604///
605/// The C and Z data types are complex valued single and double precision, respectively.
606///
607/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
608///
609/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
610/// `fill_mode` indicates which part of the matrix is used.
611///
612/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
613///
614/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
615///
616/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
617/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
618///
619/// `info` must have one integer entry for each matrix in the batch.
620/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
621/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
622///
623/// The other part of `A` is used as workspace.
624/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
625///
626/// # Errors
627///
628/// Returns an error if cuSOLVER has not been initialized, if the
629/// matrix dimensions, leading dimension, or batch size are invalid, or if
630/// cuSOLVER reports an internal failure.
631pub fn dpotrf_batched(
632 ctx: &Context,
633 fill_mode: FillMode,
634 n: usize,
635 a: BatchedMatrixRef<'_, f64>,
636 info: &mut DeviceMemory<i32>,
637) -> Result<()> {
638 ctx.bind()?;
639 validate_batched_square_matrix_pointers(n, a)?;
640 require_info_entries(info, a.len())?;
641 unsafe {
642 try_ffi!(sys::cusolverDnDpotrfBatched(
643 ctx.as_raw(),
644 fill_mode.into(),
645 to_i32(n, "n")?,
646 a.as_mut_ptr(),
647 to_i32(a.leading_dimension, "lda")?,
648 info.as_mut_ptr().cast(),
649 to_i32(a.len(), "batch_size")?,
650 ))?;
651 }
652 Ok(())
653}
654
655/// The S and D data types are real valued single and double precision, respectively.
656///
657/// The C and Z data types are complex valued single and double precision, respectively.
658///
659/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
660///
661/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
662/// `fill_mode` indicates which part of the matrix is used.
663///
664/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
665///
666/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
667///
668/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
669/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
670///
671/// `info` must have one integer entry for each matrix in the batch.
672/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
673/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
674///
675/// The other part of `A` is used as workspace.
676/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
677///
678/// # Errors
679///
680/// Returns an error if cuSOLVER has not been initialized, if the
681/// matrix dimensions, leading dimension, or batch size are invalid, or if
682/// cuSOLVER reports an internal failure.
683pub fn cpotrf_batched(
684 ctx: &Context,
685 fill_mode: FillMode,
686 n: usize,
687 a: BatchedMatrixRef<'_, Complex32>,
688 info: &mut DeviceMemory<i32>,
689) -> Result<()> {
690 ctx.bind()?;
691 validate_batched_square_matrix_pointers(n, a)?;
692 require_info_entries(info, a.len())?;
693 unsafe {
694 try_ffi!(sys::cusolverDnCpotrfBatched(
695 ctx.as_raw(),
696 fill_mode.into(),
697 to_i32(n, "n")?,
698 a.as_mut_ptr().cast(),
699 to_i32(a.leading_dimension, "lda")?,
700 info.as_mut_ptr().cast(),
701 to_i32(a.len(), "batch_size")?,
702 ))?;
703 }
704 Ok(())
705}
706
707/// The S and D data types are real valued single and double precision, respectively.
708///
709/// The C and Z data types are complex valued single and double precision, respectively.
710///
711/// Computes the Cholesky factorization of a sequence of Hermitian positive-definite matrices.
712///
713/// Each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
714/// `fill_mode` indicates which part of the matrix is used.
715///
716/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
717///
718/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
719///
720/// If Cholesky factorization failed, that is, some leading minor of `A` is not positive definite, or equivalently some diagonal elements of `L` or `U` are not real.
721/// `info` contains one entry per matrix and reports the smallest leading minor of `A` that is not positive definite.
722///
723/// `info` must have one integer entry for each matrix in the batch.
724/// If cuSOLVER reports [`Status::InvalidValue`], `info[0] == -i` indicates that the `i`th parameter is invalid.
725/// If `potrf_batched` returns [`Ok`] and `info[i] == k` is positive, the `i`th matrix is not positive definite and the Cholesky factorization failed at row `k`.
726///
727/// The other part of `A` is used as workspace.
728/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrf_batched`.
729///
730/// # Errors
731///
732/// Returns an error if cuSOLVER has not been initialized, if the
733/// matrix dimensions, leading dimension, or batch size are invalid, or if
734/// cuSOLVER reports an internal failure.
735pub fn zpotrf_batched(
736 ctx: &Context,
737 fill_mode: FillMode,
738 n: usize,
739 a: BatchedMatrixRef<'_, Complex64>,
740 info: &mut DeviceMemory<i32>,
741) -> Result<()> {
742 ctx.bind()?;
743 validate_batched_square_matrix_pointers(n, a)?;
744 require_info_entries(info, a.len())?;
745 unsafe {
746 try_ffi!(sys::cusolverDnZpotrfBatched(
747 ctx.as_raw(),
748 fill_mode.into(),
749 to_i32(n, "n")?,
750 a.as_mut_ptr().cast(),
751 to_i32(a.leading_dimension, "lda")?,
752 info.as_mut_ptr().cast(),
753 to_i32(a.len(), "batch_size")?,
754 ))?;
755 }
756 Ok(())
757}
758
759/// Solves a sequence of linear systems
760///
761/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
762/// `fill_mode` indicates which part of the matrix is used.
763///
764/// Call `potrf_batched` first to factorize matrix `a[i]`.
765/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
766/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
767///
768/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
769///
770/// `info` is a single status value for the whole batched call.
771/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
772///
773/// - only `nrhs=1` is supported.
774///
775/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
776/// `info` from `potrsBatched` only reports invalid arguments for the batched call.
777///
778/// - the other part of `A` is used as a workspace.
779/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
780///
781/// # Errors
782///
783/// Returns an error if cuSOLVER has not been initialized, if the
784/// matrix dimensions, right-hand-side count, leading dimensions, or batch
785/// size are invalid, or if cuSOLVER reports an internal failure.
786pub fn spotrs_batched(
787 ctx: &Context,
788 fill_mode: FillMode,
789 n: usize,
790 a: BatchedMatrixRef<'_, f32>,
791 b: BatchedVectorRef<'_, f32>,
792 info: &mut DeviceMemory<i32>,
793) -> Result<()> {
794 ctx.bind()?;
795 validate_batched_square_matrix_pointers(n, a)?;
796 validate_batched_vector_pointers(n, b)?;
797 require_info_buffer(info)?;
798 if a.len() != b.len() {
799 return Err(Error::InvalidMatrixShape);
800 }
801 unsafe {
802 try_ffi!(sys::cusolverDnSpotrsBatched(
803 ctx.as_raw(),
804 fill_mode.into(),
805 to_i32(n, "n")?,
806 1,
807 a.as_mut_ptr(),
808 to_i32(a.leading_dimension, "lda")?,
809 b.as_mut_ptr(),
810 to_i32(b.leading_dimension, "ldb")?,
811 info.as_mut_ptr().cast(),
812 to_i32(a.len(), "batch_size")?,
813 ))?;
814 }
815 Ok(())
816}
817
818/// Solves a sequence of linear systems
819///
820/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
821/// `fill_mode` indicates which part of the matrix is used.
822///
823/// Call `potrf_batched` first to factorize matrix `a[i]`.
824/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
825/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
826///
827/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
828///
829/// `info` is a single status value for the whole batched call.
830/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
831///
832/// - only `nrhs=1` is supported.
833///
834/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
835/// `info` from `potrsBatched` only reports invalid arguments for the batched call.
836///
837/// - the other part of `A` is used as a workspace.
838/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
839///
840/// # Errors
841///
842/// Returns an error if cuSOLVER has not been initialized, if the
843/// matrix dimensions, right-hand-side count, leading dimensions, or batch
844/// size are invalid, or if cuSOLVER reports an internal failure.
845pub fn dpotrs_batched(
846 ctx: &Context,
847 fill_mode: FillMode,
848 n: usize,
849 a: BatchedMatrixRef<'_, f64>,
850 b: BatchedVectorRef<'_, f64>,
851 info: &mut DeviceMemory<i32>,
852) -> Result<()> {
853 ctx.bind()?;
854 validate_batched_square_matrix_pointers(n, a)?;
855 validate_batched_vector_pointers(n, b)?;
856 require_info_buffer(info)?;
857 if a.len() != b.len() {
858 return Err(Error::InvalidMatrixShape);
859 }
860 unsafe {
861 try_ffi!(sys::cusolverDnDpotrsBatched(
862 ctx.as_raw(),
863 fill_mode.into(),
864 to_i32(n, "n")?,
865 1,
866 a.as_mut_ptr(),
867 to_i32(a.leading_dimension, "lda")?,
868 b.as_mut_ptr(),
869 to_i32(b.leading_dimension, "ldb")?,
870 info.as_mut_ptr().cast(),
871 to_i32(a.len(), "batch_size")?,
872 ))?;
873 }
874 Ok(())
875}
876
877/// Solves a sequence of linear systems
878///
879/// where each `a[i]` for `i = 0, 1, ..., batch_size - 1` is a $n \times n$ Hermitian matrix, only lower or upper part is meaningful.
880/// `fill_mode` indicates which part of the matrix is used.
881///
882/// Call `potrf_batched` first to factorize matrix `a[i]`.
883/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
884/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
885///
886/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
887///
888/// `info` is a single status value for the whole batched call.
889/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
890///
891/// - only `nrhs=1` is supported.
892///
893/// - `info` from `potrf_batched` indicates whether each matrix is positive definite.
894/// `info` from `potrsBatched` only reports invalid arguments for the batched call.
895///
896/// - the other part of `A` is used as a workspace.
897/// For example, if `fill_mode` is [`FillMode::Upper`], upper triangle of `A` contains Cholesky factor `U` and lower triangle of `A` is destroyed after `potrsBatched`.
898///
899/// # Errors
900///
901/// Returns an error if cuSOLVER has not been initialized, if the
902/// matrix dimensions, right-hand-side count, leading dimensions, or batch
903/// size are invalid, or if cuSOLVER reports an internal failure.
904pub fn zpotrs_batched(
905 ctx: &Context,
906 fill_mode: FillMode,
907 n: usize,
908 a: BatchedMatrixRef<'_, Complex64>,
909 b: BatchedVectorRef<'_, Complex64>,
910 info: &mut DeviceMemory<i32>,
911) -> Result<()> {
912 ctx.bind()?;
913 validate_batched_square_matrix_pointers(n, a)?;
914 validate_batched_vector_pointers(n, b)?;
915 require_info_buffer(info)?;
916 if a.len() != b.len() {
917 return Err(Error::InvalidMatrixShape);
918 }
919 unsafe {
920 try_ffi!(sys::cusolverDnZpotrsBatched(
921 ctx.as_raw(),
922 fill_mode.into(),
923 to_i32(n, "n")?,
924 1,
925 a.as_mut_ptr().cast(),
926 to_i32(a.leading_dimension, "lda")?,
927 b.as_mut_ptr().cast(),
928 to_i32(b.leading_dimension, "ldb")?,
929 info.as_mut_ptr().cast(),
930 to_i32(a.len(), "batch_size")?,
931 ))?;
932 }
933 Ok(())
934}
935
936pub fn spotri_buffer_size(
937 ctx: &Context,
938 fill_mode: FillMode,
939 n: usize,
940 a: &mut DeviceMemory<f32>,
941 lda: usize,
942) -> Result<usize> {
943 ctx.bind()?;
944 validate_square_matrix(n, a.len(), lda)?;
945 let mut lwork = 0;
946 unsafe {
947 try_ffi!(sys::cusolverDnSpotri_bufferSize(
948 ctx.as_raw(),
949 fill_mode.into(),
950 to_i32(n, "n")?,
951 a.as_mut_ptr().cast(),
952 to_i32(lda, "lda")?,
953 &raw mut lwork,
954 ))?;
955 }
956 to_usize(lwork, "lwork")
957}
958
959pub fn dpotri_buffer_size(
960 ctx: &Context,
961 fill_mode: FillMode,
962 n: usize,
963 a: &mut DeviceMemory<f64>,
964 lda: usize,
965) -> Result<usize> {
966 ctx.bind()?;
967 validate_square_matrix(n, a.len(), lda)?;
968 let mut lwork = 0;
969 unsafe {
970 try_ffi!(sys::cusolverDnDpotri_bufferSize(
971 ctx.as_raw(),
972 fill_mode.into(),
973 to_i32(n, "n")?,
974 a.as_mut_ptr().cast(),
975 to_i32(lda, "lda")?,
976 &raw mut lwork,
977 ))?;
978 }
979 to_usize(lwork, "lwork")
980}
981
982pub fn cpotri_buffer_size(
983 ctx: &Context,
984 fill_mode: FillMode,
985 n: usize,
986 a: &mut DeviceMemory<Complex32>,
987 lda: usize,
988) -> Result<usize> {
989 ctx.bind()?;
990 validate_square_matrix(n, a.len(), lda)?;
991 let mut lwork = 0;
992 unsafe {
993 try_ffi!(sys::cusolverDnCpotri_bufferSize(
994 ctx.as_raw(),
995 fill_mode.into(),
996 to_i32(n, "n")?,
997 a.as_mut_ptr().cast(),
998 to_i32(lda, "lda")?,
999 &raw mut lwork,
1000 ))?;
1001 }
1002 to_usize(lwork, "lwork")
1003}
1004
1005pub fn zpotri_buffer_size(
1006 ctx: &Context,
1007 fill_mode: FillMode,
1008 n: usize,
1009 a: &mut DeviceMemory<Complex64>,
1010 lda: usize,
1011) -> Result<usize> {
1012 ctx.bind()?;
1013 validate_square_matrix(n, a.len(), lda)?;
1014 let mut lwork = 0;
1015 unsafe {
1016 try_ffi!(sys::cusolverDnZpotri_bufferSize(
1017 ctx.as_raw(),
1018 fill_mode.into(),
1019 to_i32(n, "n")?,
1020 a.as_mut_ptr().cast(),
1021 to_i32(lda, "lda")?,
1022 &raw mut lwork,
1023 ))?;
1024 }
1025 to_usize(lwork, "lwork")
1026}
1027
1028/// Use the matching buffer-size helper to calculate the required workspace size.
1029///
1030/// The S and D data types are real valued single and double precision, respectively.
1031///
1032/// The C and Z data types are complex valued single and double precision, respectively.
1033///
1034/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1035///
1036/// computed by `potrf()`.
1037///
1038/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1039/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1040/// The other triangular part is left unchanged.
1041///
1042/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1043///
1044/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1045///
1046/// Provide workspace through `workspace`.
1047/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1048///
1049/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1050///
1051/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1052///
1053/// # Errors
1054///
1055/// Returns an error if cuSOLVER has not been initialized, if the
1056/// matrix dimensions or leading dimension are invalid, if the current GPU
1057/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1058pub fn spotri(
1059 ctx: &Context,
1060 fill_mode: FillMode,
1061 n: usize,
1062 a: &mut DeviceMemory<f32>,
1063 lda: usize,
1064 workspace: &mut DeviceMemory<f32>,
1065 dev_info: &mut DeviceMemory<i32>,
1066) -> Result<()> {
1067 ctx.bind()?;
1068 validate_square_matrix(n, a.len(), lda)?;
1069 require_info_buffer(dev_info)?;
1070 let lwork = spotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1071 require_workspace(workspace.len(), lwork)?;
1072 unsafe {
1073 try_ffi!(sys::cusolverDnSpotri(
1074 ctx.as_raw(),
1075 fill_mode.into(),
1076 to_i32(n, "n")?,
1077 a.as_mut_ptr().cast(),
1078 to_i32(lda, "lda")?,
1079 workspace.as_mut_ptr().cast(),
1080 to_i32(lwork, "lwork")?,
1081 dev_info.as_mut_ptr().cast(),
1082 ))?;
1083 }
1084 Ok(())
1085}
1086
1087/// Use the matching buffer-size helper to calculate the required workspace size.
1088///
1089/// The S and D data types are real valued single and double precision, respectively.
1090///
1091/// The C and Z data types are complex valued single and double precision, respectively.
1092///
1093/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1094///
1095/// computed by `potrf()`.
1096///
1097/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1098/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1099/// The other triangular part is left unchanged.
1100///
1101/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1102///
1103/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1104///
1105/// Provide workspace through `workspace`.
1106/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1107///
1108/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1109///
1110/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1111///
1112/// # Errors
1113///
1114/// Returns an error if cuSOLVER has not been initialized, if the
1115/// matrix dimensions or leading dimension are invalid, if the current GPU
1116/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1117pub fn dpotri(
1118 ctx: &Context,
1119 fill_mode: FillMode,
1120 n: usize,
1121 a: &mut DeviceMemory<f64>,
1122 lda: usize,
1123 workspace: &mut DeviceMemory<f64>,
1124 dev_info: &mut DeviceMemory<i32>,
1125) -> Result<()> {
1126 ctx.bind()?;
1127 validate_square_matrix(n, a.len(), lda)?;
1128 require_info_buffer(dev_info)?;
1129 let lwork = dpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1130 require_workspace(workspace.len(), lwork)?;
1131 unsafe {
1132 try_ffi!(sys::cusolverDnDpotri(
1133 ctx.as_raw(),
1134 fill_mode.into(),
1135 to_i32(n, "n")?,
1136 a.as_mut_ptr().cast(),
1137 to_i32(lda, "lda")?,
1138 workspace.as_mut_ptr().cast(),
1139 to_i32(lwork, "lwork")?,
1140 dev_info.as_mut_ptr().cast(),
1141 ))?;
1142 }
1143 Ok(())
1144}
1145
1146/// Use the matching buffer-size helper to calculate the required workspace size.
1147///
1148/// The S and D data types are real valued single and double precision, respectively.
1149///
1150/// The C and Z data types are complex valued single and double precision, respectively.
1151///
1152/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1153///
1154/// computed by `potrf()`.
1155///
1156/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1157/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1158/// The other triangular part is left unchanged.
1159///
1160/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1161///
1162/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1163///
1164/// Provide workspace through `workspace`.
1165/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1166///
1167/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1168///
1169/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1170///
1171/// # Errors
1172///
1173/// Returns an error if cuSOLVER has not been initialized, if the
1174/// matrix dimensions or leading dimension are invalid, if the current GPU
1175/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1176pub fn cpotri(
1177 ctx: &Context,
1178 fill_mode: FillMode,
1179 n: usize,
1180 a: &mut DeviceMemory<Complex32>,
1181 lda: usize,
1182 workspace: &mut DeviceMemory<Complex32>,
1183 dev_info: &mut DeviceMemory<i32>,
1184) -> Result<()> {
1185 ctx.bind()?;
1186 validate_square_matrix(n, a.len(), lda)?;
1187 require_info_buffer(dev_info)?;
1188 let lwork = cpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1189 require_workspace(workspace.len(), lwork)?;
1190 unsafe {
1191 try_ffi!(sys::cusolverDnCpotri(
1192 ctx.as_raw(),
1193 fill_mode.into(),
1194 to_i32(n, "n")?,
1195 a.as_mut_ptr().cast(),
1196 to_i32(lda, "lda")?,
1197 workspace.as_mut_ptr().cast(),
1198 to_i32(lwork, "lwork")?,
1199 dev_info.as_mut_ptr().cast(),
1200 ))?;
1201 }
1202 Ok(())
1203}
1204
1205/// Use the matching buffer-size helper to calculate the required workspace size.
1206///
1207/// The S and D data types are real valued single and double precision, respectively.
1208///
1209/// The C and Z data types are complex valued single and double precision, respectively.
1210///
1211/// Computes the inverse of a positive-definite matrix `A` using the Cholesky factorization
1212///
1213/// computed by `potrf()`.
1214///
1215/// `A` is an $n \times n$ matrix containing the triangular factor `L` or `U` computed by the Cholesky factorization.
1216/// Only the lower or upper part is meaningful, as selected by `fill_mode`.
1217/// The other triangular part is left unchanged.
1218///
1219/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular part of the inverse.
1220///
1221/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular part of the inverse.
1222///
1223/// Provide workspace through `workspace`.
1224/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1225///
1226/// If the inverse computation fails because a leading minor of `L` or `U` is singular, `dev_info` indicates the smallest leading minor that is not positive definite.
1227///
1228/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1229///
1230/// # Errors
1231///
1232/// Returns an error if cuSOLVER has not been initialized, if the
1233/// matrix dimensions or leading dimension are invalid, if the current GPU
1234/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1235pub fn zpotri(
1236 ctx: &Context,
1237 fill_mode: FillMode,
1238 n: usize,
1239 a: &mut DeviceMemory<Complex64>,
1240 lda: usize,
1241 workspace: &mut DeviceMemory<Complex64>,
1242 dev_info: &mut DeviceMemory<i32>,
1243) -> Result<()> {
1244 ctx.bind()?;
1245 validate_square_matrix(n, a.len(), lda)?;
1246 require_info_buffer(dev_info)?;
1247 let lwork = zpotri_buffer_size(ctx, fill_mode, n, a, lda)?;
1248 require_workspace(workspace.len(), lwork)?;
1249 unsafe {
1250 try_ffi!(sys::cusolverDnZpotri(
1251 ctx.as_raw(),
1252 fill_mode.into(),
1253 to_i32(n, "n")?,
1254 a.as_mut_ptr().cast(),
1255 to_i32(lda, "lda")?,
1256 workspace.as_mut_ptr().cast(),
1257 to_i32(lwork, "lwork")?,
1258 dev_info.as_mut_ptr().cast(),
1259 ))?;
1260 }
1261 Ok(())
1262}
1263
1264pub fn sgetrf_buffer_size(
1265 ctx: &Context,
1266 m: usize,
1267 n: usize,
1268 a: &mut DeviceMemory<f32>,
1269 lda: usize,
1270) -> Result<usize> {
1271 ctx.bind()?;
1272 validate_matrix(m, n, a.len(), lda)?;
1273 let mut lwork = 0;
1274 unsafe {
1275 try_ffi!(sys::cusolverDnSgetrf_bufferSize(
1276 ctx.as_raw(),
1277 to_i32(m, "m")?,
1278 to_i32(n, "n")?,
1279 a.as_mut_ptr().cast(),
1280 to_i32(lda, "lda")?,
1281 &raw mut lwork,
1282 ))?;
1283 }
1284 to_usize(lwork, "lwork")
1285}
1286
1287pub fn dgetrf_buffer_size(
1288 ctx: &Context,
1289 m: usize,
1290 n: usize,
1291 a: &mut DeviceMemory<f64>,
1292 lda: usize,
1293) -> Result<usize> {
1294 ctx.bind()?;
1295 validate_matrix(m, n, a.len(), lda)?;
1296 let mut lwork = 0;
1297 unsafe {
1298 try_ffi!(sys::cusolverDnDgetrf_bufferSize(
1299 ctx.as_raw(),
1300 to_i32(m, "m")?,
1301 to_i32(n, "n")?,
1302 a.as_mut_ptr().cast(),
1303 to_i32(lda, "lda")?,
1304 &raw mut lwork,
1305 ))?;
1306 }
1307 to_usize(lwork, "lwork")
1308}
1309
1310pub fn cgetrf_buffer_size(
1311 ctx: &Context,
1312 m: usize,
1313 n: usize,
1314 a: &mut DeviceMemory<Complex32>,
1315 lda: usize,
1316) -> Result<usize> {
1317 ctx.bind()?;
1318 validate_matrix(m, n, a.len(), lda)?;
1319 let mut lwork = 0;
1320 unsafe {
1321 try_ffi!(sys::cusolverDnCgetrf_bufferSize(
1322 ctx.as_raw(),
1323 to_i32(m, "m")?,
1324 to_i32(n, "n")?,
1325 a.as_mut_ptr().cast(),
1326 to_i32(lda, "lda")?,
1327 &raw mut lwork,
1328 ))?;
1329 }
1330 to_usize(lwork, "lwork")
1331}
1332
1333pub fn zgetrf_buffer_size(
1334 ctx: &Context,
1335 m: usize,
1336 n: usize,
1337 a: &mut DeviceMemory<Complex64>,
1338 lda: usize,
1339) -> Result<usize> {
1340 ctx.bind()?;
1341 validate_matrix(m, n, a.len(), lda)?;
1342 let mut lwork = 0;
1343 unsafe {
1344 try_ffi!(sys::cusolverDnZgetrf_bufferSize(
1345 ctx.as_raw(),
1346 to_i32(m, "m")?,
1347 to_i32(n, "n")?,
1348 a.as_mut_ptr().cast(),
1349 to_i32(lda, "lda")?,
1350 &raw mut lwork,
1351 ))?;
1352 }
1353 to_usize(lwork, "lwork")
1354}
1355
1356/// Use the matching buffer-size helper to calculate the required workspace size.
1357///
1358/// The S and D data types are real single and double precision, respectively.
1359///
1360/// The C and Z data types are complex valued single and double precision, respectively.
1361///
1362/// Computes the LU factorization of an $m \times n$ matrix
1363///
1364/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1365///
1366/// Provide workspace through `workspace`.
1367/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1368///
1369/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1370///
1371/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1372///
1373/// If `pivots` is `None`, no pivoting is performed.
1374/// The factorization is `A=L*U`, which is not numerically stable.
1375///
1376/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1377/// sequence. Row `i` is interchanged with row `pivots[i]`.
1378///
1379/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1380///
1381/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1382/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1383///
1384/// # Errors
1385///
1386/// Returns an error if cuSOLVER has not been initialized, if the
1387/// matrix dimensions or leading dimension are invalid, if the current GPU
1388/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1389pub fn sgetrf(
1390 ctx: &Context,
1391 m: usize,
1392 n: usize,
1393 a: &mut DeviceMemory<f32>,
1394 lda: usize,
1395 workspace: &mut DeviceMemory<f32>,
1396 pivots: Option<&mut DeviceMemory<i32>>,
1397 dev_info: &mut DeviceMemory<i32>,
1398) -> Result<()> {
1399 ctx.bind()?;
1400 validate_matrix(m, n, a.len(), lda)?;
1401 require_info_buffer(dev_info)?;
1402 if let Some(pivots) = pivots.as_ref() {
1403 require_pivot_buffer(pivots, m.min(n))?;
1404 }
1405 let lwork = sgetrf_buffer_size(ctx, m, n, a, lda)?;
1406 require_workspace(workspace.len(), lwork)?;
1407 unsafe {
1408 try_ffi!(sys::cusolverDnSgetrf(
1409 ctx.as_raw(),
1410 to_i32(m, "m")?,
1411 to_i32(n, "n")?,
1412 a.as_mut_ptr().cast(),
1413 to_i32(lda, "lda")?,
1414 workspace.as_mut_ptr().cast(),
1415 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1416 dev_info.as_mut_ptr().cast(),
1417 ))?;
1418 }
1419 Ok(())
1420}
1421
1422/// Use the matching buffer-size helper to calculate the required workspace size.
1423///
1424/// The S and D data types are real single and double precision, respectively.
1425///
1426/// The C and Z data types are complex valued single and double precision, respectively.
1427///
1428/// Computes the LU factorization of an $m \times n$ matrix
1429///
1430/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1431///
1432/// Provide workspace through `workspace`.
1433/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1434///
1435/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1436///
1437/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1438///
1439/// If `pivots` is `None`, no pivoting is performed.
1440/// The factorization is `A=L*U`, which is not numerically stable.
1441///
1442/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1443/// sequence. Row `i` is interchanged with row `pivots[i]`.
1444///
1445/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1446///
1447/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1448/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1449///
1450/// # Errors
1451///
1452/// Returns an error if cuSOLVER has not been initialized, if the
1453/// matrix dimensions or leading dimension are invalid, if the current GPU
1454/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1455pub fn dgetrf(
1456 ctx: &Context,
1457 m: usize,
1458 n: usize,
1459 a: &mut DeviceMemory<f64>,
1460 lda: usize,
1461 workspace: &mut DeviceMemory<f64>,
1462 pivots: Option<&mut DeviceMemory<i32>>,
1463 dev_info: &mut DeviceMemory<i32>,
1464) -> Result<()> {
1465 ctx.bind()?;
1466 validate_matrix(m, n, a.len(), lda)?;
1467 require_info_buffer(dev_info)?;
1468 if let Some(pivots) = pivots.as_ref() {
1469 require_pivot_buffer(pivots, m.min(n))?;
1470 }
1471 let lwork = dgetrf_buffer_size(ctx, m, n, a, lda)?;
1472 require_workspace(workspace.len(), lwork)?;
1473 unsafe {
1474 try_ffi!(sys::cusolverDnDgetrf(
1475 ctx.as_raw(),
1476 to_i32(m, "m")?,
1477 to_i32(n, "n")?,
1478 a.as_mut_ptr().cast(),
1479 to_i32(lda, "lda")?,
1480 workspace.as_mut_ptr().cast(),
1481 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1482 dev_info.as_mut_ptr().cast(),
1483 ))?;
1484 }
1485 Ok(())
1486}
1487
1488/// Use the matching buffer-size helper to calculate the required workspace size.
1489///
1490/// The S and D data types are real single and double precision, respectively.
1491///
1492/// The C and Z data types are complex valued single and double precision, respectively.
1493///
1494/// Computes the LU factorization of an $m \times n$ matrix
1495///
1496/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1497///
1498/// Provide workspace through `workspace`.
1499/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1500///
1501/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1502///
1503/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1504///
1505/// If `pivots` is `None`, no pivoting is performed.
1506/// The factorization is `A=L*U`, which is not numerically stable.
1507///
1508/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1509/// sequence. Row `i` is interchanged with row `pivots[i]`.
1510///
1511/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1512///
1513/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1514/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1515///
1516/// # Errors
1517///
1518/// Returns an error if cuSOLVER has not been initialized, if the
1519/// matrix dimensions or leading dimension are invalid, if the current GPU
1520/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1521pub fn cgetrf(
1522 ctx: &Context,
1523 m: usize,
1524 n: usize,
1525 a: &mut DeviceMemory<Complex32>,
1526 lda: usize,
1527 workspace: &mut DeviceMemory<Complex32>,
1528 pivots: Option<&mut DeviceMemory<i32>>,
1529 dev_info: &mut DeviceMemory<i32>,
1530) -> Result<()> {
1531 ctx.bind()?;
1532 validate_matrix(m, n, a.len(), lda)?;
1533 require_info_buffer(dev_info)?;
1534 if let Some(pivots) = pivots.as_ref() {
1535 require_pivot_buffer(pivots, m.min(n))?;
1536 }
1537 let lwork = cgetrf_buffer_size(ctx, m, n, a, lda)?;
1538 require_workspace(workspace.len(), lwork)?;
1539 unsafe {
1540 try_ffi!(sys::cusolverDnCgetrf(
1541 ctx.as_raw(),
1542 to_i32(m, "m")?,
1543 to_i32(n, "n")?,
1544 a.as_mut_ptr().cast(),
1545 to_i32(lda, "lda")?,
1546 workspace.as_mut_ptr().cast(),
1547 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1548 dev_info.as_mut_ptr().cast(),
1549 ))?;
1550 }
1551 Ok(())
1552}
1553
1554/// Use the matching buffer-size helper to calculate the required workspace size.
1555///
1556/// The S and D data types are real single and double precision, respectively.
1557///
1558/// The C and Z data types are complex valued single and double precision, respectively.
1559///
1560/// Computes the LU factorization of an $m \times n$ matrix
1561///
1562/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
1563///
1564/// Provide workspace through `workspace`.
1565/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1566///
1567/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
1568///
1569/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1570///
1571/// If `pivots` is `None`, no pivoting is performed.
1572/// The factorization is `A=L*U`, which is not numerically stable.
1573///
1574/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
1575/// sequence. Row `i` is interchanged with row `pivots[i]`.
1576///
1577/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1578///
1579/// `getrf` uses the fastest implementation with a large workspace of size `m * n`.
1580/// Callers can choose the legacy implementation with minimal workspace by calling [`Params::set_adv_options`] with [`Function::Getrf`](crate::types::Function::Getrf) and [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1).
1581///
1582/// # Errors
1583///
1584/// Returns an error if cuSOLVER has not been initialized, if the
1585/// matrix dimensions or leading dimension are invalid, if the current GPU
1586/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1587pub fn zgetrf(
1588 ctx: &Context,
1589 m: usize,
1590 n: usize,
1591 a: &mut DeviceMemory<Complex64>,
1592 lda: usize,
1593 workspace: &mut DeviceMemory<Complex64>,
1594 pivots: Option<&mut DeviceMemory<i32>>,
1595 dev_info: &mut DeviceMemory<i32>,
1596) -> Result<()> {
1597 ctx.bind()?;
1598 validate_matrix(m, n, a.len(), lda)?;
1599 require_info_buffer(dev_info)?;
1600 if let Some(pivots) = pivots.as_ref() {
1601 require_pivot_buffer(pivots, m.min(n))?;
1602 }
1603 let lwork = zgetrf_buffer_size(ctx, m, n, a, lda)?;
1604 require_workspace(workspace.len(), lwork)?;
1605 unsafe {
1606 try_ffi!(sys::cusolverDnZgetrf(
1607 ctx.as_raw(),
1608 to_i32(m, "m")?,
1609 to_i32(n, "n")?,
1610 a.as_mut_ptr().cast(),
1611 to_i32(lda, "lda")?,
1612 workspace.as_mut_ptr().cast(),
1613 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1614 dev_info.as_mut_ptr().cast(),
1615 ))?;
1616 }
1617 Ok(())
1618}
1619
1620///
1621/// Solves a linear system of multiple right-hand sides
1622///
1623/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1624/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1625///
1626/// The `operation` argument is described by [`Operation`].
1627///
1628/// `pivots` is returned by the matching `getrf` operation.
1629/// It contains pivot indices, which are used to permute right-hand sides.
1630///
1631/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1632///
1633/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1634///
1635/// # Errors
1636///
1637/// Returns an error if cuSOLVER has not been initialized, if the
1638/// matrix dimensions or leading dimensions are invalid, if the current GPU
1639/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1640pub fn sgetrs(
1641 ctx: &Context,
1642 operation: Operation,
1643 n: usize,
1644 nrhs: usize,
1645 a: &DeviceMemory<f32>,
1646 lda: usize,
1647 pivots: &DeviceMemory<i32>,
1648 b: &mut DeviceMemory<f32>,
1649 ldb: usize,
1650 dev_info: &mut DeviceMemory<i32>,
1651) -> Result<()> {
1652 ctx.bind()?;
1653 validate_square_matrix(n, a.len(), lda)?;
1654 validate_matrix(n, nrhs, b.len(), ldb)?;
1655 require_pivot_buffer(pivots, n)?;
1656 require_info_buffer(dev_info)?;
1657 unsafe {
1658 try_ffi!(sys::cusolverDnSgetrs(
1659 ctx.as_raw(),
1660 operation.into(),
1661 to_i32(n, "n")?,
1662 to_i32(nrhs, "nrhs")?,
1663 a.as_ptr().cast(),
1664 to_i32(lda, "lda")?,
1665 pivots.as_ptr().cast(),
1666 b.as_mut_ptr().cast(),
1667 to_i32(ldb, "ldb")?,
1668 dev_info.as_mut_ptr().cast(),
1669 ))?;
1670 }
1671 Ok(())
1672}
1673
1674///
1675/// Solves a linear system of multiple right-hand sides
1676///
1677/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1678/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1679///
1680/// The `operation` argument is described by [`Operation`].
1681///
1682/// `pivots` is returned by the matching `getrf` operation.
1683/// It contains pivot indices, which are used to permute right-hand sides.
1684///
1685/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1686///
1687/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1688///
1689/// # Errors
1690///
1691/// Returns an error if cuSOLVER has not been initialized, if the
1692/// matrix dimensions or leading dimensions are invalid, if the current GPU
1693/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1694pub fn dgetrs(
1695 ctx: &Context,
1696 operation: Operation,
1697 n: usize,
1698 nrhs: usize,
1699 a: &DeviceMemory<f64>,
1700 lda: usize,
1701 pivots: &DeviceMemory<i32>,
1702 b: &mut DeviceMemory<f64>,
1703 ldb: usize,
1704 dev_info: &mut DeviceMemory<i32>,
1705) -> Result<()> {
1706 ctx.bind()?;
1707 validate_square_matrix(n, a.len(), lda)?;
1708 validate_matrix(n, nrhs, b.len(), ldb)?;
1709 require_pivot_buffer(pivots, n)?;
1710 require_info_buffer(dev_info)?;
1711 unsafe {
1712 try_ffi!(sys::cusolverDnDgetrs(
1713 ctx.as_raw(),
1714 operation.into(),
1715 to_i32(n, "n")?,
1716 to_i32(nrhs, "nrhs")?,
1717 a.as_ptr().cast(),
1718 to_i32(lda, "lda")?,
1719 pivots.as_ptr().cast(),
1720 b.as_mut_ptr().cast(),
1721 to_i32(ldb, "ldb")?,
1722 dev_info.as_mut_ptr().cast(),
1723 ))?;
1724 }
1725 Ok(())
1726}
1727
1728///
1729/// Solves a linear system of multiple right-hand sides
1730///
1731/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1732/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1733///
1734/// The `operation` argument is described by [`Operation`].
1735///
1736/// `pivots` is returned by the matching `getrf` operation.
1737/// It contains pivot indices, which are used to permute right-hand sides.
1738///
1739/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1740///
1741/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1742///
1743/// # Errors
1744///
1745/// Returns an error if cuSOLVER has not been initialized, if the
1746/// matrix dimensions or leading dimensions are invalid, if the current GPU
1747/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1748pub fn cgetrs(
1749 ctx: &Context,
1750 operation: Operation,
1751 n: usize,
1752 nrhs: usize,
1753 a: &DeviceMemory<Complex32>,
1754 lda: usize,
1755 pivots: &DeviceMemory<i32>,
1756 b: &mut DeviceMemory<Complex32>,
1757 ldb: usize,
1758 dev_info: &mut DeviceMemory<i32>,
1759) -> Result<()> {
1760 ctx.bind()?;
1761 validate_square_matrix(n, a.len(), lda)?;
1762 validate_matrix(n, nrhs, b.len(), ldb)?;
1763 require_pivot_buffer(pivots, n)?;
1764 require_info_buffer(dev_info)?;
1765 unsafe {
1766 try_ffi!(sys::cusolverDnCgetrs(
1767 ctx.as_raw(),
1768 operation.into(),
1769 to_i32(n, "n")?,
1770 to_i32(nrhs, "nrhs")?,
1771 a.as_ptr().cast(),
1772 to_i32(lda, "lda")?,
1773 pivots.as_ptr().cast(),
1774 b.as_mut_ptr().cast(),
1775 to_i32(ldb, "ldb")?,
1776 dev_info.as_mut_ptr().cast(),
1777 ))?;
1778 }
1779 Ok(())
1780}
1781
1782///
1783/// Solves a linear system of multiple right-hand sides
1784///
1785/// where `A` is an $n \times n$ matrix, and was LU-factored by `getrf`, that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
1786/// `B` is an $n\times {nrhs}$ right-hand side matrix.
1787///
1788/// The `operation` argument is described by [`Operation`].
1789///
1790/// `pivots` is returned by the matching `getrf` operation.
1791/// It contains pivot indices, which are used to permute right-hand sides.
1792///
1793/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1794///
1795/// Callers can combine `getrf` and `getrs` to complete a linear solver.
1796///
1797/// # Errors
1798///
1799/// Returns an error if cuSOLVER has not been initialized, if the
1800/// matrix dimensions or leading dimensions are invalid, if the current GPU
1801/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1802pub fn zgetrs(
1803 ctx: &Context,
1804 operation: Operation,
1805 n: usize,
1806 nrhs: usize,
1807 a: &DeviceMemory<Complex64>,
1808 lda: usize,
1809 pivots: &DeviceMemory<i32>,
1810 b: &mut DeviceMemory<Complex64>,
1811 ldb: usize,
1812 dev_info: &mut DeviceMemory<i32>,
1813) -> Result<()> {
1814 ctx.bind()?;
1815 validate_square_matrix(n, a.len(), lda)?;
1816 validate_matrix(n, nrhs, b.len(), ldb)?;
1817 require_pivot_buffer(pivots, n)?;
1818 require_info_buffer(dev_info)?;
1819 unsafe {
1820 try_ffi!(sys::cusolverDnZgetrs(
1821 ctx.as_raw(),
1822 operation.into(),
1823 to_i32(n, "n")?,
1824 to_i32(nrhs, "nrhs")?,
1825 a.as_ptr().cast(),
1826 to_i32(lda, "lda")?,
1827 pivots.as_ptr().cast(),
1828 b.as_mut_ptr().cast(),
1829 to_i32(ldb, "ldb")?,
1830 dev_info.as_mut_ptr().cast(),
1831 ))?;
1832 }
1833 Ok(())
1834}
1835
1836pub fn ssytrf_buffer_size(
1837 ctx: &Context,
1838 n: usize,
1839 a: &mut DeviceMemory<f32>,
1840 lda: usize,
1841) -> Result<usize> {
1842 ctx.bind()?;
1843 validate_square_matrix(n, a.len(), lda)?;
1844 let mut lwork = 0;
1845 unsafe {
1846 try_ffi!(sys::cusolverDnSsytrf_bufferSize(
1847 ctx.as_raw(),
1848 to_i32(n, "n")?,
1849 a.as_mut_ptr().cast(),
1850 to_i32(lda, "lda")?,
1851 &raw mut lwork,
1852 ))?;
1853 }
1854 to_usize(lwork, "lwork")
1855}
1856
1857pub fn dsytrf_buffer_size(
1858 ctx: &Context,
1859 n: usize,
1860 a: &mut DeviceMemory<f64>,
1861 lda: usize,
1862) -> Result<usize> {
1863 ctx.bind()?;
1864 validate_square_matrix(n, a.len(), lda)?;
1865 let mut lwork = 0;
1866 unsafe {
1867 try_ffi!(sys::cusolverDnDsytrf_bufferSize(
1868 ctx.as_raw(),
1869 to_i32(n, "n")?,
1870 a.as_mut_ptr().cast(),
1871 to_i32(lda, "lda")?,
1872 &raw mut lwork,
1873 ))?;
1874 }
1875 to_usize(lwork, "lwork")
1876}
1877
1878pub fn csytrf_buffer_size(
1879 ctx: &Context,
1880 n: usize,
1881 a: &mut DeviceMemory<Complex32>,
1882 lda: usize,
1883) -> Result<usize> {
1884 ctx.bind()?;
1885 validate_square_matrix(n, a.len(), lda)?;
1886 let mut lwork = 0;
1887 unsafe {
1888 try_ffi!(sys::cusolverDnCsytrf_bufferSize(
1889 ctx.as_raw(),
1890 to_i32(n, "n")?,
1891 a.as_mut_ptr().cast(),
1892 to_i32(lda, "lda")?,
1893 &raw mut lwork,
1894 ))?;
1895 }
1896 to_usize(lwork, "lwork")
1897}
1898
1899pub fn zsytrf_buffer_size(
1900 ctx: &Context,
1901 n: usize,
1902 a: &mut DeviceMemory<Complex64>,
1903 lda: usize,
1904) -> Result<usize> {
1905 ctx.bind()?;
1906 validate_square_matrix(n, a.len(), lda)?;
1907 let mut lwork = 0;
1908 unsafe {
1909 try_ffi!(sys::cusolverDnZsytrf_bufferSize(
1910 ctx.as_raw(),
1911 to_i32(n, "n")?,
1912 a.as_mut_ptr().cast(),
1913 to_i32(lda, "lda")?,
1914 &raw mut lwork,
1915 ))?;
1916 }
1917 to_usize(lwork, "lwork")
1918}
1919
1920/// Use the matching buffer-size helper to calculate the required workspace size.
1921///
1922/// The S and D data types are real valued single and double precision, respectively.
1923///
1924/// The C and Z data types are complex valued single and double precision, respectively.
1925///
1926/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
1927///
1928/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
1929/// `fill_mode` indicates which part of the matrix is used.
1930/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
1931///
1932/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
1933/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
1934///
1935/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
1936///
1937/// Provide workspace through `workspace`.
1938/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
1939/// The workspace size in bytes is `size_of::<T>() * lwork`.
1940/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
1941///
1942/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
1943/// `dev_info = i` indicates `D(i, i) = 0`.
1944///
1945/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
1946///
1947/// `pivots` contains the pivoting sequence.
1948/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
1949/// is interchanged with row/column `k`.
1950/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
1951/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
1952/// with row/column `m`.
1953/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
1954/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
1955/// with row/column `m`.
1956///
1957/// # Errors
1958///
1959/// Returns an error if cuSOLVER has not been initialized, if the
1960/// matrix dimensions or leading dimension are invalid, if the current GPU
1961/// architecture is unsupported, or if cuSOLVER reports an internal failure.
1962pub fn ssytrf(
1963 ctx: &Context,
1964 fill_mode: FillMode,
1965 n: usize,
1966 a: &mut DeviceMemory<f32>,
1967 lda: usize,
1968 pivots: Option<&mut DeviceMemory<i32>>,
1969 workspace: &mut DeviceMemory<f32>,
1970 dev_info: &mut DeviceMemory<i32>,
1971) -> Result<()> {
1972 ctx.bind()?;
1973 validate_square_matrix(n, a.len(), lda)?;
1974 if let Some(pivots) = pivots.as_ref() {
1975 require_pivot_buffer(pivots, n)?;
1976 }
1977 require_info_buffer(dev_info)?;
1978 let lwork = ssytrf_buffer_size(ctx, n, a, lda)?;
1979 require_workspace(workspace.len(), lwork)?;
1980 unsafe {
1981 try_ffi!(sys::cusolverDnSsytrf(
1982 ctx.as_raw(),
1983 fill_mode.into(),
1984 to_i32(n, "n")?,
1985 a.as_mut_ptr().cast(),
1986 to_i32(lda, "lda")?,
1987 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
1988 workspace.as_mut_ptr().cast(),
1989 to_i32(lwork, "lwork")?,
1990 dev_info.as_mut_ptr().cast(),
1991 ))?;
1992 }
1993 Ok(())
1994}
1995
1996/// Use the matching buffer-size helper to calculate the required workspace size.
1997///
1998/// The S and D data types are real valued single and double precision, respectively.
1999///
2000/// The C and Z data types are complex valued single and double precision, respectively.
2001///
2002/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2003///
2004/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2005/// `fill_mode` indicates which part of the matrix is used.
2006/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2007///
2008/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2009/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2010///
2011/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2012///
2013/// Provide workspace through `workspace`.
2014/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2015/// The workspace size in bytes is `size_of::<T>() * lwork`.
2016/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2017///
2018/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2019/// `dev_info = i` indicates `D(i, i) = 0`.
2020///
2021/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2022///
2023/// `pivots` contains the pivoting sequence.
2024/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2025/// is interchanged with row/column `k`.
2026/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2027/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2028/// with row/column `m`.
2029/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2030/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2031/// with row/column `m`.
2032///
2033/// # Errors
2034///
2035/// Returns an error if cuSOLVER has not been initialized, if the
2036/// matrix dimensions or leading dimension are invalid, if the current GPU
2037/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2038pub fn dsytrf(
2039 ctx: &Context,
2040 fill_mode: FillMode,
2041 n: usize,
2042 a: &mut DeviceMemory<f64>,
2043 lda: usize,
2044 pivots: Option<&mut DeviceMemory<i32>>,
2045 workspace: &mut DeviceMemory<f64>,
2046 dev_info: &mut DeviceMemory<i32>,
2047) -> Result<()> {
2048 ctx.bind()?;
2049 validate_square_matrix(n, a.len(), lda)?;
2050 if let Some(pivots) = pivots.as_ref() {
2051 require_pivot_buffer(pivots, n)?;
2052 }
2053 require_info_buffer(dev_info)?;
2054 let lwork = dsytrf_buffer_size(ctx, n, a, lda)?;
2055 require_workspace(workspace.len(), lwork)?;
2056 unsafe {
2057 try_ffi!(sys::cusolverDnDsytrf(
2058 ctx.as_raw(),
2059 fill_mode.into(),
2060 to_i32(n, "n")?,
2061 a.as_mut_ptr().cast(),
2062 to_i32(lda, "lda")?,
2063 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2064 workspace.as_mut_ptr().cast(),
2065 to_i32(lwork, "lwork")?,
2066 dev_info.as_mut_ptr().cast(),
2067 ))?;
2068 }
2069 Ok(())
2070}
2071
2072/// Use the matching buffer-size helper to calculate the required workspace size.
2073///
2074/// The S and D data types are real valued single and double precision, respectively.
2075///
2076/// The C and Z data types are complex valued single and double precision, respectively.
2077///
2078/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2079///
2080/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2081/// `fill_mode` indicates which part of the matrix is used.
2082/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2083///
2084/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2085/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2086///
2087/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2088///
2089/// Provide workspace through `workspace`.
2090/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2091/// The workspace size in bytes is `size_of::<T>() * lwork`.
2092/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2093///
2094/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2095/// `dev_info = i` indicates `D(i, i) = 0`.
2096///
2097/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2098///
2099/// `pivots` contains the pivoting sequence.
2100/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2101/// is interchanged with row/column `k`.
2102/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2103/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2104/// with row/column `m`.
2105/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2106/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2107/// with row/column `m`.
2108///
2109/// # Errors
2110///
2111/// Returns an error if cuSOLVER has not been initialized, if the
2112/// matrix dimensions or leading dimension are invalid, if the current GPU
2113/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2114pub fn csytrf(
2115 ctx: &Context,
2116 fill_mode: FillMode,
2117 n: usize,
2118 a: &mut DeviceMemory<Complex32>,
2119 lda: usize,
2120 pivots: Option<&mut DeviceMemory<i32>>,
2121 workspace: &mut DeviceMemory<Complex32>,
2122 dev_info: &mut DeviceMemory<i32>,
2123) -> Result<()> {
2124 ctx.bind()?;
2125 validate_square_matrix(n, a.len(), lda)?;
2126 if let Some(pivots) = pivots.as_ref() {
2127 require_pivot_buffer(pivots, n)?;
2128 }
2129 require_info_buffer(dev_info)?;
2130 let lwork = csytrf_buffer_size(ctx, n, a, lda)?;
2131 require_workspace(workspace.len(), lwork)?;
2132 unsafe {
2133 try_ffi!(sys::cusolverDnCsytrf(
2134 ctx.as_raw(),
2135 fill_mode.into(),
2136 to_i32(n, "n")?,
2137 a.as_mut_ptr().cast(),
2138 to_i32(lda, "lda")?,
2139 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2140 workspace.as_mut_ptr().cast(),
2141 to_i32(lwork, "lwork")?,
2142 dev_info.as_mut_ptr().cast(),
2143 ))?;
2144 }
2145 Ok(())
2146}
2147
2148/// Use the matching buffer-size helper to calculate the required workspace size.
2149///
2150/// The S and D data types are real valued single and double precision, respectively.
2151///
2152/// The C and Z data types are complex valued single and double precision, respectively.
2153///
2154/// Computes the factorization of a symmetric indefinite matrix using the Bunch-Kaufman diagonal pivoting.
2155///
2156/// `A` is a $n \times n$ symmetric matrix, only lower or upper part is meaningful.
2157/// `fill_mode` indicates which part of the matrix is used.
2158/// If `pivots` is `None`, no pivoting is performed, which is not numerically stable.
2159///
2160/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular factor `L` and block diagonal matrix `D`.
2161/// Each block of `D` is either 1x1 or 2x2 block, depending on pivoting.
2162///
2163/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular factor `U` and block diagonal matrix `D`.
2164///
2165/// Provide workspace through `workspace`.
2166/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2167/// The workspace size in bytes is `size_of::<T>() * lwork`.
2168/// When no pivoting is performed, the other triangular part of the input matrix `A` is used as workspace.
2169///
2170/// If Bunch-Kaufman factorization failed, that is, `A` is singular,
2171/// `dev_info = i` indicates `D(i, i) = 0`.
2172///
2173/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2174///
2175/// `pivots` contains the pivoting sequence.
2176/// If `pivots[i] = k` with `k > 0`, `D(i, i)` is a 1x1 block, and row/column `i` of `A`
2177/// is interchanged with row/column `k`.
2178/// If `fill_mode` is [`FillMode::Upper`] and `pivots[i - 1] = pivots[i] = -m` with `m > 0`,
2179/// `D(i-1:i,i-1:i)` is a 2x2 block, and row/column `i - 1` is interchanged
2180/// with row/column `m`.
2181/// If `fill_mode` is [`FillMode::Lower`] and `pivots[i + 1] = pivots[i] = -m` with `m > 0`,
2182/// `D(i:i+1,i:i+1)` is a 2x2 block, and row/column `i + 1` is interchanged
2183/// with row/column `m`.
2184///
2185/// # Errors
2186///
2187/// Returns an error if cuSOLVER has not been initialized, if the
2188/// matrix dimensions or leading dimension are invalid, if the current GPU
2189/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2190pub fn zsytrf(
2191 ctx: &Context,
2192 fill_mode: FillMode,
2193 n: usize,
2194 a: &mut DeviceMemory<Complex64>,
2195 lda: usize,
2196 pivots: Option<&mut DeviceMemory<i32>>,
2197 workspace: &mut DeviceMemory<Complex64>,
2198 dev_info: &mut DeviceMemory<i32>,
2199) -> Result<()> {
2200 ctx.bind()?;
2201 validate_square_matrix(n, a.len(), lda)?;
2202 if let Some(pivots) = pivots.as_ref() {
2203 require_pivot_buffer(pivots, n)?;
2204 }
2205 require_info_buffer(dev_info)?;
2206 let lwork = zsytrf_buffer_size(ctx, n, a, lda)?;
2207 require_workspace(workspace.len(), lwork)?;
2208 unsafe {
2209 try_ffi!(sys::cusolverDnZsytrf(
2210 ctx.as_raw(),
2211 fill_mode.into(),
2212 to_i32(n, "n")?,
2213 a.as_mut_ptr().cast(),
2214 to_i32(lda, "lda")?,
2215 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
2216 workspace.as_mut_ptr().cast(),
2217 to_i32(lwork, "lwork")?,
2218 dev_info.as_mut_ptr().cast(),
2219 ))?;
2220 }
2221 Ok(())
2222}
2223
2224pub fn sgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2225 ctx.bind()?;
2226 validate_bidiagonal_dims(m, n)?;
2227 let mut lwork = 0;
2228 unsafe {
2229 try_ffi!(sys::cusolverDnSgebrd_bufferSize(
2230 ctx.as_raw(),
2231 to_i32(m, "m")?,
2232 to_i32(n, "n")?,
2233 &raw mut lwork,
2234 ))?;
2235 }
2236 to_usize(lwork, "lwork")
2237}
2238
2239pub fn dgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2240 ctx.bind()?;
2241 validate_bidiagonal_dims(m, n)?;
2242 let mut lwork = 0;
2243 unsafe {
2244 try_ffi!(sys::cusolverDnDgebrd_bufferSize(
2245 ctx.as_raw(),
2246 to_i32(m, "m")?,
2247 to_i32(n, "n")?,
2248 &raw mut lwork,
2249 ))?;
2250 }
2251 to_usize(lwork, "lwork")
2252}
2253
2254pub fn cgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2255 ctx.bind()?;
2256 validate_bidiagonal_dims(m, n)?;
2257 let mut lwork = 0;
2258 unsafe {
2259 try_ffi!(sys::cusolverDnCgebrd_bufferSize(
2260 ctx.as_raw(),
2261 to_i32(m, "m")?,
2262 to_i32(n, "n")?,
2263 &raw mut lwork,
2264 ))?;
2265 }
2266 to_usize(lwork, "lwork")
2267}
2268
2269pub fn zgebrd_buffer_size(ctx: &Context, m: usize, n: usize) -> Result<usize> {
2270 ctx.bind()?;
2271 validate_bidiagonal_dims(m, n)?;
2272 let mut lwork = 0;
2273 unsafe {
2274 try_ffi!(sys::cusolverDnZgebrd_bufferSize(
2275 ctx.as_raw(),
2276 to_i32(m, "m")?,
2277 to_i32(n, "n")?,
2278 &raw mut lwork,
2279 ))?;
2280 }
2281 to_usize(lwork, "lwork")
2282}
2283
2284/// Use the matching buffer-size helper to calculate the required workspace size.
2285///
2286/// The S and D data types are real valued single and double precision, respectively.
2287///
2288/// The C and Z data types are complex valued single and double precision, respectively.
2289///
2290/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2291/// bidiagonal form `B` by an orthogonal transformation:
2292/// $Q^{H}\cdot A\cdot P = B$.
2293///
2294/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2295/// bidiagonal.
2296///
2297/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2298///
2299/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2300/// the upper bidiagonal matrix `B`. Elements below the diagonal, together
2301/// with `tauq`, represent `Q`; elements above the first superdiagonal,
2302/// together with `taup`, represent `P`.
2303/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2304/// lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2305/// together with `tauq`, represent `Q`; elements above the diagonal,
2306/// together with `taup`, represent `P`.
2307///
2308/// Provide workspace through `workspace`.
2309/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2310///
2311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2312///
2313/// `gebrd` only supports `m >= n`.
2314///
2315/// # Errors
2316///
2317/// Returns an error if cuSOLVER has not been initialized, if the
2318/// matrix dimensions or leading dimension are invalid, if the current GPU
2319/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2320pub fn sgebrd(
2321 ctx: &Context,
2322 m: usize,
2323 n: usize,
2324 a: &mut DeviceMemory<f32>,
2325 lda: usize,
2326 d: &mut DeviceMemory<f32>,
2327 e: &mut DeviceMemory<f32>,
2328 tauq: &mut DeviceMemory<f32>,
2329 taup: &mut DeviceMemory<f32>,
2330 workspace: &mut DeviceMemory<f32>,
2331 dev_info: &mut DeviceMemory<i32>,
2332) -> Result<()> {
2333 ctx.bind()?;
2334 validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2335 require_info_buffer(dev_info)?;
2336 let lwork = sgebrd_buffer_size(ctx, m, n)?;
2337 require_workspace(workspace.len(), lwork)?;
2338 unsafe {
2339 try_ffi!(sys::cusolverDnSgebrd(
2340 ctx.as_raw(),
2341 to_i32(m, "m")?,
2342 to_i32(n, "n")?,
2343 a.as_mut_ptr().cast(),
2344 to_i32(lda, "lda")?,
2345 d.as_mut_ptr().cast(),
2346 e.as_mut_ptr().cast(),
2347 tauq.as_mut_ptr().cast(),
2348 taup.as_mut_ptr().cast(),
2349 workspace.as_mut_ptr().cast(),
2350 to_i32(lwork, "lwork")?,
2351 dev_info.as_mut_ptr().cast(),
2352 ))?;
2353 }
2354 Ok(())
2355}
2356
2357/// Use the matching buffer-size helper to calculate the required workspace size.
2358///
2359/// The S and D data types are real valued single and double precision, respectively.
2360///
2361/// The C and Z data types are complex valued single and double precision, respectively.
2362///
2363/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2364/// bidiagonal form `B` by an orthogonal transformation:
2365/// $Q^{H}\cdot A\cdot P = B$.
2366///
2367/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2368/// bidiagonal.
2369///
2370/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2371///
2372/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2373/// the upper bidiagonal matrix `B`. Elements below the diagonal, together
2374/// with `tauq`, represent `Q`; elements above the first superdiagonal,
2375/// together with `taup`, represent `P`.
2376/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2377/// lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2378/// together with `tauq`, represent `Q`; elements above the diagonal,
2379/// together with `taup`, represent `P`.
2380///
2381/// Provide workspace through `workspace`.
2382/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2383///
2384/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2385///
2386/// `gebrd` only supports `m >= n`.
2387///
2388/// # Errors
2389///
2390/// Returns an error if cuSOLVER has not been initialized, if the
2391/// matrix dimensions or leading dimension are invalid, if the current GPU
2392/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2393pub fn dgebrd(
2394 ctx: &Context,
2395 m: usize,
2396 n: usize,
2397 a: &mut DeviceMemory<f64>,
2398 lda: usize,
2399 d: &mut DeviceMemory<f64>,
2400 e: &mut DeviceMemory<f64>,
2401 tauq: &mut DeviceMemory<f64>,
2402 taup: &mut DeviceMemory<f64>,
2403 workspace: &mut DeviceMemory<f64>,
2404 dev_info: &mut DeviceMemory<i32>,
2405) -> Result<()> {
2406 ctx.bind()?;
2407 validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2408 require_info_buffer(dev_info)?;
2409 let lwork = dgebrd_buffer_size(ctx, m, n)?;
2410 require_workspace(workspace.len(), lwork)?;
2411 unsafe {
2412 try_ffi!(sys::cusolverDnDgebrd(
2413 ctx.as_raw(),
2414 to_i32(m, "m")?,
2415 to_i32(n, "n")?,
2416 a.as_mut_ptr().cast(),
2417 to_i32(lda, "lda")?,
2418 d.as_mut_ptr().cast(),
2419 e.as_mut_ptr().cast(),
2420 tauq.as_mut_ptr().cast(),
2421 taup.as_mut_ptr().cast(),
2422 workspace.as_mut_ptr().cast(),
2423 to_i32(lwork, "lwork")?,
2424 dev_info.as_mut_ptr().cast(),
2425 ))?;
2426 }
2427 Ok(())
2428}
2429
2430/// Use the matching buffer-size helper to calculate the required workspace size.
2431///
2432/// The S and D data types are real valued single and double precision, respectively.
2433///
2434/// The C and Z data types are complex valued single and double precision, respectively.
2435///
2436/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2437/// bidiagonal form `B` by an orthogonal transformation:
2438/// $Q^{H}\cdot A\cdot P = B$.
2439///
2440/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2441/// bidiagonal.
2442///
2443/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2444///
2445/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2446/// the upper bidiagonal matrix `B`. Elements below the diagonal, together
2447/// with `tauq`, represent `Q`; elements above the first superdiagonal,
2448/// together with `taup`, represent `P`.
2449/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2450/// lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2451/// together with `tauq`, represent `Q`; elements above the diagonal,
2452/// together with `taup`, represent `P`.
2453///
2454/// Provide workspace through `workspace`.
2455/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2456///
2457/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2458///
2459/// `gebrd` only supports `m >= n`.
2460///
2461/// # Errors
2462///
2463/// Returns an error if cuSOLVER has not been initialized, if the
2464/// matrix dimensions or leading dimension are invalid, if the current GPU
2465/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2466pub fn cgebrd(
2467 ctx: &Context,
2468 m: usize,
2469 n: usize,
2470 a: &mut DeviceMemory<Complex32>,
2471 lda: usize,
2472 d: &mut DeviceMemory<f32>,
2473 e: &mut DeviceMemory<f32>,
2474 tauq: &mut DeviceMemory<Complex32>,
2475 taup: &mut DeviceMemory<Complex32>,
2476 workspace: &mut DeviceMemory<Complex32>,
2477 dev_info: &mut DeviceMemory<i32>,
2478) -> Result<()> {
2479 ctx.bind()?;
2480 validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2481 require_info_buffer(dev_info)?;
2482 let lwork = cgebrd_buffer_size(ctx, m, n)?;
2483 require_workspace(workspace.len(), lwork)?;
2484 unsafe {
2485 try_ffi!(sys::cusolverDnCgebrd(
2486 ctx.as_raw(),
2487 to_i32(m, "m")?,
2488 to_i32(n, "n")?,
2489 a.as_mut_ptr().cast(),
2490 to_i32(lda, "lda")?,
2491 d.as_mut_ptr().cast(),
2492 e.as_mut_ptr().cast(),
2493 tauq.as_mut_ptr().cast(),
2494 taup.as_mut_ptr().cast(),
2495 workspace.as_mut_ptr().cast(),
2496 to_i32(lwork, "lwork")?,
2497 dev_info.as_mut_ptr().cast(),
2498 ))?;
2499 }
2500 Ok(())
2501}
2502
2503/// Use the matching buffer-size helper to calculate the required workspace size.
2504///
2505/// The S and D data types are real valued single and double precision, respectively.
2506///
2507/// The C and Z data types are complex valued single and double precision, respectively.
2508///
2509/// Reduces a general $m \times n$ matrix `A` to a real upper or lower
2510/// bidiagonal form `B` by an orthogonal transformation:
2511/// $Q^{H}\cdot A\cdot P = B$.
2512///
2513/// If `m >= n`, `B` is upper bidiagonal; if `m < n`, `B` is lower
2514/// bidiagonal.
2515///
2516/// The matrix `Q` and `P` are overwritten into matrix `A` in the following sense:
2517///
2518/// - If `m >= n`, the diagonal and first superdiagonal are overwritten with
2519/// the upper bidiagonal matrix `B`. Elements below the diagonal, together
2520/// with `tauq`, represent `Q`; elements above the first superdiagonal,
2521/// together with `taup`, represent `P`.
2522/// - If `m < n`, the diagonal and first subdiagonal are overwritten with the
2523/// lower bidiagonal matrix `B`. Elements below the first subdiagonal,
2524/// together with `tauq`, represent `Q`; elements above the diagonal,
2525/// together with `taup`, represent `P`.
2526///
2527/// Provide workspace through `workspace`.
2528/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2529///
2530/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2531///
2532/// `gebrd` only supports `m >= n`.
2533///
2534/// # Errors
2535///
2536/// Returns an error if cuSOLVER has not been initialized, if the
2537/// matrix dimensions or leading dimension are invalid, if the current GPU
2538/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2539pub fn zgebrd(
2540 ctx: &Context,
2541 m: usize,
2542 n: usize,
2543 a: &mut DeviceMemory<Complex64>,
2544 lda: usize,
2545 d: &mut DeviceMemory<f64>,
2546 e: &mut DeviceMemory<f64>,
2547 tauq: &mut DeviceMemory<Complex64>,
2548 taup: &mut DeviceMemory<Complex64>,
2549 workspace: &mut DeviceMemory<Complex64>,
2550 dev_info: &mut DeviceMemory<i32>,
2551) -> Result<()> {
2552 ctx.bind()?;
2553 validate_bidiagonal_buffers(m, n, a.len(), lda, d.len(), e.len(), tauq.len(), taup.len())?;
2554 require_info_buffer(dev_info)?;
2555 let lwork = zgebrd_buffer_size(ctx, m, n)?;
2556 require_workspace(workspace.len(), lwork)?;
2557 unsafe {
2558 try_ffi!(sys::cusolverDnZgebrd(
2559 ctx.as_raw(),
2560 to_i32(m, "m")?,
2561 to_i32(n, "n")?,
2562 a.as_mut_ptr().cast(),
2563 to_i32(lda, "lda")?,
2564 d.as_mut_ptr().cast(),
2565 e.as_mut_ptr().cast(),
2566 tauq.as_mut_ptr().cast(),
2567 taup.as_mut_ptr().cast(),
2568 workspace.as_mut_ptr().cast(),
2569 to_i32(lwork, "lwork")?,
2570 dev_info.as_mut_ptr().cast(),
2571 ))?;
2572 }
2573 Ok(())
2574}
2575
2576pub fn sorgbr_buffer_size(
2577 ctx: &Context,
2578 side: SideMode,
2579 m: usize,
2580 n: usize,
2581 k: usize,
2582 a: &DeviceMemory<f32>,
2583 lda: usize,
2584 tau: &DeviceMemory<f32>,
2585) -> Result<usize> {
2586 ctx.bind()?;
2587 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2588 let mut lwork = 0;
2589 unsafe {
2590 try_ffi!(sys::cusolverDnSorgbr_bufferSize(
2591 ctx.as_raw(),
2592 side.into(),
2593 to_i32(m, "m")?,
2594 to_i32(n, "n")?,
2595 to_i32(k, "k")?,
2596 a.as_ptr().cast(),
2597 to_i32(lda, "lda")?,
2598 tau.as_ptr().cast(),
2599 &raw mut lwork,
2600 ))?;
2601 }
2602 to_usize(lwork, "lwork")
2603}
2604
2605pub fn dorgbr_buffer_size(
2606 ctx: &Context,
2607 side: SideMode,
2608 m: usize,
2609 n: usize,
2610 k: usize,
2611 a: &DeviceMemory<f64>,
2612 lda: usize,
2613 tau: &DeviceMemory<f64>,
2614) -> Result<usize> {
2615 ctx.bind()?;
2616 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2617 let mut lwork = 0;
2618 unsafe {
2619 try_ffi!(sys::cusolverDnDorgbr_bufferSize(
2620 ctx.as_raw(),
2621 side.into(),
2622 to_i32(m, "m")?,
2623 to_i32(n, "n")?,
2624 to_i32(k, "k")?,
2625 a.as_ptr().cast(),
2626 to_i32(lda, "lda")?,
2627 tau.as_ptr().cast(),
2628 &raw mut lwork,
2629 ))?;
2630 }
2631 to_usize(lwork, "lwork")
2632}
2633
2634pub fn cungbr_buffer_size(
2635 ctx: &Context,
2636 side: SideMode,
2637 m: usize,
2638 n: usize,
2639 k: usize,
2640 a: &DeviceMemory<Complex32>,
2641 lda: usize,
2642 tau: &DeviceMemory<Complex32>,
2643) -> Result<usize> {
2644 ctx.bind()?;
2645 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2646 let mut lwork = 0;
2647 unsafe {
2648 try_ffi!(sys::cusolverDnCungbr_bufferSize(
2649 ctx.as_raw(),
2650 side.into(),
2651 to_i32(m, "m")?,
2652 to_i32(n, "n")?,
2653 to_i32(k, "k")?,
2654 a.as_ptr().cast(),
2655 to_i32(lda, "lda")?,
2656 tau.as_ptr().cast(),
2657 &raw mut lwork,
2658 ))?;
2659 }
2660 to_usize(lwork, "lwork")
2661}
2662
2663pub fn zungbr_buffer_size(
2664 ctx: &Context,
2665 side: SideMode,
2666 m: usize,
2667 n: usize,
2668 k: usize,
2669 a: &DeviceMemory<Complex64>,
2670 lda: usize,
2671 tau: &DeviceMemory<Complex64>,
2672) -> Result<usize> {
2673 ctx.bind()?;
2674 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2675 let mut lwork = 0;
2676 unsafe {
2677 try_ffi!(sys::cusolverDnZungbr_bufferSize(
2678 ctx.as_raw(),
2679 side.into(),
2680 to_i32(m, "m")?,
2681 to_i32(n, "n")?,
2682 to_i32(k, "k")?,
2683 a.as_ptr().cast(),
2684 to_i32(lda, "lda")?,
2685 tau.as_ptr().cast(),
2686 &raw mut lwork,
2687 ))?;
2688 }
2689 to_usize(lwork, "lwork")
2690}
2691
2692/// Use the matching buffer-size helper to calculate the required workspace size.
2693///
2694/// The S and D data types are real valued single and double precision, respectively.
2695///
2696/// The C and Z data types are complex valued single and double precision, respectively.
2697///
2698/// Generates one of the unitary matrices `Q` or $P^{H}$ determined by `gebrd`
2699/// when reducing matrix `A` to bidiagonal form:
2700/// $Q^{H}\cdot A\cdot P = B$.
2701///
2702/// `Q` and $P^{H}$ are defined as products of elementary reflectors `H(i)`
2703/// or `G(i)`, respectively.
2704///
2705/// Provide workspace through `workspace`.
2706/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2707/// The workspace size in bytes is `size_of::<T>() * lwork`.
2708///
2709/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2710///
2711/// # Errors
2712///
2713/// Returns an error if cuSOLVER has not been initialized, if the
2714/// matrix dimensions or leading dimension are invalid, if the current GPU
2715/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2716pub fn sorgbr(
2717 ctx: &Context,
2718 side: SideMode,
2719 m: usize,
2720 n: usize,
2721 k: usize,
2722 a: &mut DeviceMemory<f32>,
2723 lda: usize,
2724 tau: &DeviceMemory<f32>,
2725 workspace: &mut DeviceMemory<f32>,
2726 dev_info: &mut DeviceMemory<i32>,
2727) -> Result<()> {
2728 ctx.bind()?;
2729 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2730 require_info_buffer(dev_info)?;
2731 let lwork = sorgbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2732 require_workspace(workspace.len(), lwork)?;
2733 unsafe {
2734 try_ffi!(sys::cusolverDnSorgbr(
2735 ctx.as_raw(),
2736 side.into(),
2737 to_i32(m, "m")?,
2738 to_i32(n, "n")?,
2739 to_i32(k, "k")?,
2740 a.as_mut_ptr().cast(),
2741 to_i32(lda, "lda")?,
2742 tau.as_ptr().cast(),
2743 workspace.as_mut_ptr().cast(),
2744 to_i32(lwork, "lwork")?,
2745 dev_info.as_mut_ptr().cast(),
2746 ))?;
2747 }
2748 Ok(())
2749}
2750
2751/// Use the matching buffer-size helper to calculate the required workspace size.
2752///
2753/// The S and D data types are real valued single and double precision, respectively.
2754///
2755/// The C and Z data types are complex valued single and double precision, respectively.
2756///
2757/// Generates one of the unitary matrices `Q` or $P^{H}$ determined by `gebrd`
2758/// when reducing matrix `A` to bidiagonal form:
2759/// $Q^{H}\cdot A\cdot P = B$.
2760///
2761/// `Q` and $P^{H}$ are defined as products of elementary reflectors `H(i)`
2762/// or `G(i)`, respectively.
2763///
2764/// Provide workspace through `workspace`.
2765/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
2766/// The workspace size in bytes is `size_of::<T>() * lwork`.
2767///
2768/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
2769///
2770/// # Errors
2771///
2772/// Returns an error if cuSOLVER has not been initialized, if the
2773/// matrix dimensions or leading dimension are invalid, if the current GPU
2774/// architecture is unsupported, or if cuSOLVER reports an internal failure.
2775pub fn dorgbr(
2776 ctx: &Context,
2777 side: SideMode,
2778 m: usize,
2779 n: usize,
2780 k: usize,
2781 a: &mut DeviceMemory<f64>,
2782 lda: usize,
2783 tau: &DeviceMemory<f64>,
2784 workspace: &mut DeviceMemory<f64>,
2785 dev_info: &mut DeviceMemory<i32>,
2786) -> Result<()> {
2787 ctx.bind()?;
2788 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2789 require_info_buffer(dev_info)?;
2790 let lwork = dorgbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2791 require_workspace(workspace.len(), lwork)?;
2792 unsafe {
2793 try_ffi!(sys::cusolverDnDorgbr(
2794 ctx.as_raw(),
2795 side.into(),
2796 to_i32(m, "m")?,
2797 to_i32(n, "n")?,
2798 to_i32(k, "k")?,
2799 a.as_mut_ptr().cast(),
2800 to_i32(lda, "lda")?,
2801 tau.as_ptr().cast(),
2802 workspace.as_mut_ptr().cast(),
2803 to_i32(lwork, "lwork")?,
2804 dev_info.as_mut_ptr().cast(),
2805 ))?;
2806 }
2807 Ok(())
2808}
2809
2810pub fn cungbr(
2811 ctx: &Context,
2812 side: SideMode,
2813 m: usize,
2814 n: usize,
2815 k: usize,
2816 a: &mut DeviceMemory<Complex32>,
2817 lda: usize,
2818 tau: &DeviceMemory<Complex32>,
2819 workspace: &mut DeviceMemory<Complex32>,
2820 dev_info: &mut DeviceMemory<i32>,
2821) -> Result<()> {
2822 ctx.bind()?;
2823 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2824 require_info_buffer(dev_info)?;
2825 let lwork = cungbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2826 require_workspace(workspace.len(), lwork)?;
2827 unsafe {
2828 try_ffi!(sys::cusolverDnCungbr(
2829 ctx.as_raw(),
2830 side.into(),
2831 to_i32(m, "m")?,
2832 to_i32(n, "n")?,
2833 to_i32(k, "k")?,
2834 a.as_mut_ptr().cast(),
2835 to_i32(lda, "lda")?,
2836 tau.as_ptr().cast(),
2837 workspace.as_mut_ptr().cast(),
2838 to_i32(lwork, "lwork")?,
2839 dev_info.as_mut_ptr().cast(),
2840 ))?;
2841 }
2842 Ok(())
2843}
2844
2845pub fn zungbr(
2846 ctx: &Context,
2847 side: SideMode,
2848 m: usize,
2849 n: usize,
2850 k: usize,
2851 a: &mut DeviceMemory<Complex64>,
2852 lda: usize,
2853 tau: &DeviceMemory<Complex64>,
2854 workspace: &mut DeviceMemory<Complex64>,
2855 dev_info: &mut DeviceMemory<i32>,
2856) -> Result<()> {
2857 ctx.bind()?;
2858 validate_orgbr_inputs(side, m, n, k, a.len(), lda, tau.len())?;
2859 require_info_buffer(dev_info)?;
2860 let lwork = zungbr_buffer_size(ctx, side, m, n, k, a, lda, tau)?;
2861 require_workspace(workspace.len(), lwork)?;
2862 unsafe {
2863 try_ffi!(sys::cusolverDnZungbr(
2864 ctx.as_raw(),
2865 side.into(),
2866 to_i32(m, "m")?,
2867 to_i32(n, "n")?,
2868 to_i32(k, "k")?,
2869 a.as_mut_ptr().cast(),
2870 to_i32(lda, "lda")?,
2871 tau.as_ptr().cast(),
2872 workspace.as_mut_ptr().cast(),
2873 to_i32(lwork, "lwork")?,
2874 dev_info.as_mut_ptr().cast(),
2875 ))?;
2876 }
2877 Ok(())
2878}
2879
2880pub fn ssytrd_buffer_size(
2881 ctx: &Context,
2882 fill_mode: FillMode,
2883 n: usize,
2884 a: &DeviceMemory<f32>,
2885 lda: usize,
2886 d: &DeviceMemory<f32>,
2887 e: &DeviceMemory<f32>,
2888 tau: &DeviceMemory<f32>,
2889) -> Result<usize> {
2890 ctx.bind()?;
2891 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2892 let mut lwork = 0;
2893 unsafe {
2894 try_ffi!(sys::cusolverDnSsytrd_bufferSize(
2895 ctx.as_raw(),
2896 fill_mode.into(),
2897 to_i32(n, "n")?,
2898 a.as_ptr().cast(),
2899 to_i32(lda, "lda")?,
2900 d.as_ptr().cast(),
2901 e.as_ptr().cast(),
2902 tau.as_ptr().cast(),
2903 &raw mut lwork,
2904 ))?;
2905 }
2906 to_usize(lwork, "lwork")
2907}
2908
2909pub fn dsytrd_buffer_size(
2910 ctx: &Context,
2911 fill_mode: FillMode,
2912 n: usize,
2913 a: &DeviceMemory<f64>,
2914 lda: usize,
2915 d: &DeviceMemory<f64>,
2916 e: &DeviceMemory<f64>,
2917 tau: &DeviceMemory<f64>,
2918) -> Result<usize> {
2919 ctx.bind()?;
2920 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2921 let mut lwork = 0;
2922 unsafe {
2923 try_ffi!(sys::cusolverDnDsytrd_bufferSize(
2924 ctx.as_raw(),
2925 fill_mode.into(),
2926 to_i32(n, "n")?,
2927 a.as_ptr().cast(),
2928 to_i32(lda, "lda")?,
2929 d.as_ptr().cast(),
2930 e.as_ptr().cast(),
2931 tau.as_ptr().cast(),
2932 &raw mut lwork,
2933 ))?;
2934 }
2935 to_usize(lwork, "lwork")
2936}
2937
2938pub fn chetrd_buffer_size(
2939 ctx: &Context,
2940 fill_mode: FillMode,
2941 n: usize,
2942 a: &DeviceMemory<Complex32>,
2943 lda: usize,
2944 d: &DeviceMemory<f32>,
2945 e: &DeviceMemory<f32>,
2946 tau: &DeviceMemory<Complex32>,
2947) -> Result<usize> {
2948 ctx.bind()?;
2949 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2950 let mut lwork = 0;
2951 unsafe {
2952 try_ffi!(sys::cusolverDnChetrd_bufferSize(
2953 ctx.as_raw(),
2954 fill_mode.into(),
2955 to_i32(n, "n")?,
2956 a.as_ptr().cast(),
2957 to_i32(lda, "lda")?,
2958 d.as_ptr().cast(),
2959 e.as_ptr().cast(),
2960 tau.as_ptr().cast(),
2961 &raw mut lwork,
2962 ))?;
2963 }
2964 to_usize(lwork, "lwork")
2965}
2966
2967pub fn zhetrd_buffer_size(
2968 ctx: &Context,
2969 fill_mode: FillMode,
2970 n: usize,
2971 a: &DeviceMemory<Complex64>,
2972 lda: usize,
2973 d: &DeviceMemory<f64>,
2974 e: &DeviceMemory<f64>,
2975 tau: &DeviceMemory<Complex64>,
2976) -> Result<usize> {
2977 ctx.bind()?;
2978 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
2979 let mut lwork = 0;
2980 unsafe {
2981 try_ffi!(sys::cusolverDnZhetrd_bufferSize(
2982 ctx.as_raw(),
2983 fill_mode.into(),
2984 to_i32(n, "n")?,
2985 a.as_ptr().cast(),
2986 to_i32(lda, "lda")?,
2987 d.as_ptr().cast(),
2988 e.as_ptr().cast(),
2989 tau.as_ptr().cast(),
2990 &raw mut lwork,
2991 ))?;
2992 }
2993 to_usize(lwork, "lwork")
2994}
2995
2996/// Use the matching buffer-size helper to calculate the required workspace size.
2997///
2998/// The S and D data types are real valued single and double precision, respectively.
2999///
3000/// The C and Z data types are complex valued single and double precision, respectively.
3001///
3002/// Reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to the
3003/// real symmetric tridiagonal form `T` by an orthogonal transformation:
3004/// $Q^{H}\cdot A\cdot Q = T$.
3005///
3006/// On output, `A` contains `T` and Householder reflection vectors.
3007/// If `fill_mode` is [`FillMode::Upper`], the diagonal and first
3008/// superdiagonal of `A` are overwritten by `T`; elements above the first
3009/// superdiagonal, together with `tau`, represent `Q`.
3010/// If `fill_mode` is [`FillMode::Lower`], the diagonal and first subdiagonal
3011/// of `A` are overwritten by `T`; elements below the first subdiagonal,
3012/// together with `tau`, represent `Q`.
3013///
3014/// Provide workspace through `workspace`.
3015/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3016/// The workspace size in bytes is `size_of::<T>() * lwork`.
3017///
3018/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3019/// The problem size `n` is limited by `n * lda <= INT32_MAX` primarily due to the current implementation constraints.
3020///
3021/// # Errors
3022///
3023/// Returns an error if cuSOLVER has not been initialized, if the
3024/// matrix dimensions, leading dimension, or fill mode are invalid, if the
3025/// current GPU architecture is unsupported, or if cuSOLVER reports an
3026/// internal failure.
3027pub fn ssytrd(
3028 ctx: &Context,
3029 fill_mode: FillMode,
3030 n: usize,
3031 a: &mut DeviceMemory<f32>,
3032 lda: usize,
3033 d: &mut DeviceMemory<f32>,
3034 e: &mut DeviceMemory<f32>,
3035 tau: &mut DeviceMemory<f32>,
3036 workspace: &mut DeviceMemory<f32>,
3037 dev_info: &mut DeviceMemory<i32>,
3038) -> Result<()> {
3039 ctx.bind()?;
3040 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3041 require_info_buffer(dev_info)?;
3042 let lwork = ssytrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3043 require_workspace(workspace.len(), lwork)?;
3044 unsafe {
3045 try_ffi!(sys::cusolverDnSsytrd(
3046 ctx.as_raw(),
3047 fill_mode.into(),
3048 to_i32(n, "n")?,
3049 a.as_mut_ptr().cast(),
3050 to_i32(lda, "lda")?,
3051 d.as_mut_ptr().cast(),
3052 e.as_mut_ptr().cast(),
3053 tau.as_mut_ptr().cast(),
3054 workspace.as_mut_ptr().cast(),
3055 to_i32(lwork, "lwork")?,
3056 dev_info.as_mut_ptr().cast(),
3057 ))?;
3058 }
3059 Ok(())
3060}
3061
3062/// Use the matching buffer-size helper to calculate the required workspace size.
3063///
3064/// The S and D data types are real valued single and double precision, respectively.
3065///
3066/// The C and Z data types are complex valued single and double precision, respectively.
3067///
3068/// Reduces a general symmetric (Hermitian) $n \times n$ matrix `A` to the
3069/// real symmetric tridiagonal form `T` by an orthogonal transformation:
3070/// $Q^{H}\cdot A\cdot Q = T$.
3071///
3072/// On output, `A` contains `T` and Householder reflection vectors.
3073/// If `fill_mode` is [`FillMode::Upper`], the diagonal and first
3074/// superdiagonal of `A` are overwritten by `T`; elements above the first
3075/// superdiagonal, together with `tau`, represent `Q`.
3076/// If `fill_mode` is [`FillMode::Lower`], the diagonal and first subdiagonal
3077/// of `A` are overwritten by `T`; elements below the first subdiagonal,
3078/// together with `tau`, represent `Q`.
3079///
3080/// Provide workspace through `workspace`.
3081/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3082/// The workspace size in bytes is `size_of::<T>() * lwork`.
3083///
3084/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3085/// The problem size `n` is limited by `n * lda <= INT32_MAX` primarily due to the current implementation constraints.
3086///
3087/// # Errors
3088///
3089/// Returns an error if cuSOLVER has not been initialized, if the
3090/// matrix dimensions, leading dimension, or fill mode are invalid, if the
3091/// current GPU architecture is unsupported, or if cuSOLVER reports an
3092/// internal failure.
3093pub fn dsytrd(
3094 ctx: &Context,
3095 fill_mode: FillMode,
3096 n: usize,
3097 a: &mut DeviceMemory<f64>,
3098 lda: usize,
3099 d: &mut DeviceMemory<f64>,
3100 e: &mut DeviceMemory<f64>,
3101 tau: &mut DeviceMemory<f64>,
3102 workspace: &mut DeviceMemory<f64>,
3103 dev_info: &mut DeviceMemory<i32>,
3104) -> Result<()> {
3105 ctx.bind()?;
3106 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3107 require_info_buffer(dev_info)?;
3108 let lwork = dsytrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3109 require_workspace(workspace.len(), lwork)?;
3110 unsafe {
3111 try_ffi!(sys::cusolverDnDsytrd(
3112 ctx.as_raw(),
3113 fill_mode.into(),
3114 to_i32(n, "n")?,
3115 a.as_mut_ptr().cast(),
3116 to_i32(lda, "lda")?,
3117 d.as_mut_ptr().cast(),
3118 e.as_mut_ptr().cast(),
3119 tau.as_mut_ptr().cast(),
3120 workspace.as_mut_ptr().cast(),
3121 to_i32(lwork, "lwork")?,
3122 dev_info.as_mut_ptr().cast(),
3123 ))?;
3124 }
3125 Ok(())
3126}
3127
3128pub fn chetrd(
3129 ctx: &Context,
3130 fill_mode: FillMode,
3131 n: usize,
3132 a: &mut DeviceMemory<Complex32>,
3133 lda: usize,
3134 d: &mut DeviceMemory<f32>,
3135 e: &mut DeviceMemory<f32>,
3136 tau: &mut DeviceMemory<Complex32>,
3137 workspace: &mut DeviceMemory<Complex32>,
3138 dev_info: &mut DeviceMemory<i32>,
3139) -> Result<()> {
3140 ctx.bind()?;
3141 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3142 require_info_buffer(dev_info)?;
3143 let lwork = chetrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3144 require_workspace(workspace.len(), lwork)?;
3145 unsafe {
3146 try_ffi!(sys::cusolverDnChetrd(
3147 ctx.as_raw(),
3148 fill_mode.into(),
3149 to_i32(n, "n")?,
3150 a.as_mut_ptr().cast(),
3151 to_i32(lda, "lda")?,
3152 d.as_mut_ptr().cast(),
3153 e.as_mut_ptr().cast(),
3154 tau.as_mut_ptr().cast(),
3155 workspace.as_mut_ptr().cast(),
3156 to_i32(lwork, "lwork")?,
3157 dev_info.as_mut_ptr().cast(),
3158 ))?;
3159 }
3160 Ok(())
3161}
3162
3163pub fn zhetrd(
3164 ctx: &Context,
3165 fill_mode: FillMode,
3166 n: usize,
3167 a: &mut DeviceMemory<Complex64>,
3168 lda: usize,
3169 d: &mut DeviceMemory<f64>,
3170 e: &mut DeviceMemory<f64>,
3171 tau: &mut DeviceMemory<Complex64>,
3172 workspace: &mut DeviceMemory<Complex64>,
3173 dev_info: &mut DeviceMemory<i32>,
3174) -> Result<()> {
3175 ctx.bind()?;
3176 validate_sytrd_inputs(n, a.len(), lda, d.len(), e.len(), tau.len())?;
3177 require_info_buffer(dev_info)?;
3178 let lwork = zhetrd_buffer_size(ctx, fill_mode, n, a, lda, d, e, tau)?;
3179 require_workspace(workspace.len(), lwork)?;
3180 unsafe {
3181 try_ffi!(sys::cusolverDnZhetrd(
3182 ctx.as_raw(),
3183 fill_mode.into(),
3184 to_i32(n, "n")?,
3185 a.as_mut_ptr().cast(),
3186 to_i32(lda, "lda")?,
3187 d.as_mut_ptr().cast(),
3188 e.as_mut_ptr().cast(),
3189 tau.as_mut_ptr().cast(),
3190 workspace.as_mut_ptr().cast(),
3191 to_i32(lwork, "lwork")?,
3192 dev_info.as_mut_ptr().cast(),
3193 ))?;
3194 }
3195 Ok(())
3196}
3197
3198pub fn sorgtr_buffer_size(
3199 ctx: &Context,
3200 fill_mode: FillMode,
3201 n: usize,
3202 a: &DeviceMemory<f32>,
3203 lda: usize,
3204 tau: &DeviceMemory<f32>,
3205) -> Result<usize> {
3206 ctx.bind()?;
3207 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3208 let mut lwork = 0;
3209 unsafe {
3210 try_ffi!(sys::cusolverDnSorgtr_bufferSize(
3211 ctx.as_raw(),
3212 fill_mode.into(),
3213 to_i32(n, "n")?,
3214 a.as_ptr().cast(),
3215 to_i32(lda, "lda")?,
3216 tau.as_ptr().cast(),
3217 &raw mut lwork,
3218 ))?;
3219 }
3220 to_usize(lwork, "lwork")
3221}
3222
3223pub fn dorgtr_buffer_size(
3224 ctx: &Context,
3225 fill_mode: FillMode,
3226 n: usize,
3227 a: &DeviceMemory<f64>,
3228 lda: usize,
3229 tau: &DeviceMemory<f64>,
3230) -> Result<usize> {
3231 ctx.bind()?;
3232 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3233 let mut lwork = 0;
3234 unsafe {
3235 try_ffi!(sys::cusolverDnDorgtr_bufferSize(
3236 ctx.as_raw(),
3237 fill_mode.into(),
3238 to_i32(n, "n")?,
3239 a.as_ptr().cast(),
3240 to_i32(lda, "lda")?,
3241 tau.as_ptr().cast(),
3242 &raw mut lwork,
3243 ))?;
3244 }
3245 to_usize(lwork, "lwork")
3246}
3247
3248pub fn cungtr_buffer_size(
3249 ctx: &Context,
3250 fill_mode: FillMode,
3251 n: usize,
3252 a: &DeviceMemory<Complex32>,
3253 lda: usize,
3254 tau: &DeviceMemory<Complex32>,
3255) -> Result<usize> {
3256 ctx.bind()?;
3257 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3258 let mut lwork = 0;
3259 unsafe {
3260 try_ffi!(sys::cusolverDnCungtr_bufferSize(
3261 ctx.as_raw(),
3262 fill_mode.into(),
3263 to_i32(n, "n")?,
3264 a.as_ptr().cast(),
3265 to_i32(lda, "lda")?,
3266 tau.as_ptr().cast(),
3267 &raw mut lwork,
3268 ))?;
3269 }
3270 to_usize(lwork, "lwork")
3271}
3272
3273pub fn zungtr_buffer_size(
3274 ctx: &Context,
3275 fill_mode: FillMode,
3276 n: usize,
3277 a: &DeviceMemory<Complex64>,
3278 lda: usize,
3279 tau: &DeviceMemory<Complex64>,
3280) -> Result<usize> {
3281 ctx.bind()?;
3282 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3283 let mut lwork = 0;
3284 unsafe {
3285 try_ffi!(sys::cusolverDnZungtr_bufferSize(
3286 ctx.as_raw(),
3287 fill_mode.into(),
3288 to_i32(n, "n")?,
3289 a.as_ptr().cast(),
3290 to_i32(lda, "lda")?,
3291 tau.as_ptr().cast(),
3292 &raw mut lwork,
3293 ))?;
3294 }
3295 to_usize(lwork, "lwork")
3296}
3297
3298/// Use the matching buffer-size helper to calculate the required workspace size.
3299///
3300/// The S and D data types are real valued single and double precision, respectively.
3301///
3302/// The C and Z data types are complex valued single and double precision, respectively.
3303///
3304/// Generates the orthogonal matrix `Q` from the elementary reflectors returned
3305/// by `sytrd`.
3306///
3307/// Provide workspace through `workspace`.
3308/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3309/// The workspace size in bytes is `size_of::<T>() * lwork`.
3310///
3311/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3312///
3313/// # Errors
3314///
3315/// Returns an error if cuSOLVER has not been initialized, if the
3316/// matrix dimensions or leading dimension are invalid, if the current GPU
3317/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3318pub fn sorgtr(
3319 ctx: &Context,
3320 fill_mode: FillMode,
3321 n: usize,
3322 a: &mut DeviceMemory<f32>,
3323 lda: usize,
3324 tau: &DeviceMemory<f32>,
3325 workspace: &mut DeviceMemory<f32>,
3326 dev_info: &mut DeviceMemory<i32>,
3327) -> Result<()> {
3328 ctx.bind()?;
3329 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3330 require_info_buffer(dev_info)?;
3331 let lwork = sorgtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3332 require_workspace(workspace.len(), lwork)?;
3333 unsafe {
3334 try_ffi!(sys::cusolverDnSorgtr(
3335 ctx.as_raw(),
3336 fill_mode.into(),
3337 to_i32(n, "n")?,
3338 a.as_mut_ptr().cast(),
3339 to_i32(lda, "lda")?,
3340 tau.as_ptr().cast(),
3341 workspace.as_mut_ptr().cast(),
3342 to_i32(lwork, "lwork")?,
3343 dev_info.as_mut_ptr().cast(),
3344 ))?;
3345 }
3346 Ok(())
3347}
3348
3349/// Use the matching buffer-size helper to calculate the required workspace size.
3350///
3351/// The S and D data types are real valued single and double precision, respectively.
3352///
3353/// The C and Z data types are complex valued single and double precision, respectively.
3354///
3355/// Generates the orthogonal matrix `Q` from the elementary reflectors returned
3356/// by `sytrd`.
3357///
3358/// Provide workspace through `workspace`.
3359/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3360/// The workspace size in bytes is `size_of::<T>() * lwork`.
3361///
3362/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3363///
3364/// # Errors
3365///
3366/// Returns an error if cuSOLVER has not been initialized, if the
3367/// matrix dimensions or leading dimension are invalid, if the current GPU
3368/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3369pub fn dorgtr(
3370 ctx: &Context,
3371 fill_mode: FillMode,
3372 n: usize,
3373 a: &mut DeviceMemory<f64>,
3374 lda: usize,
3375 tau: &DeviceMemory<f64>,
3376 workspace: &mut DeviceMemory<f64>,
3377 dev_info: &mut DeviceMemory<i32>,
3378) -> Result<()> {
3379 ctx.bind()?;
3380 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3381 require_info_buffer(dev_info)?;
3382 let lwork = dorgtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3383 require_workspace(workspace.len(), lwork)?;
3384 unsafe {
3385 try_ffi!(sys::cusolverDnDorgtr(
3386 ctx.as_raw(),
3387 fill_mode.into(),
3388 to_i32(n, "n")?,
3389 a.as_mut_ptr().cast(),
3390 to_i32(lda, "lda")?,
3391 tau.as_ptr().cast(),
3392 workspace.as_mut_ptr().cast(),
3393 to_i32(lwork, "lwork")?,
3394 dev_info.as_mut_ptr().cast(),
3395 ))?;
3396 }
3397 Ok(())
3398}
3399
3400pub fn cungtr(
3401 ctx: &Context,
3402 fill_mode: FillMode,
3403 n: usize,
3404 a: &mut DeviceMemory<Complex32>,
3405 lda: usize,
3406 tau: &DeviceMemory<Complex32>,
3407 workspace: &mut DeviceMemory<Complex32>,
3408 dev_info: &mut DeviceMemory<i32>,
3409) -> Result<()> {
3410 ctx.bind()?;
3411 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3412 require_info_buffer(dev_info)?;
3413 let lwork = cungtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3414 require_workspace(workspace.len(), lwork)?;
3415 unsafe {
3416 try_ffi!(sys::cusolverDnCungtr(
3417 ctx.as_raw(),
3418 fill_mode.into(),
3419 to_i32(n, "n")?,
3420 a.as_mut_ptr().cast(),
3421 to_i32(lda, "lda")?,
3422 tau.as_ptr().cast(),
3423 workspace.as_mut_ptr().cast(),
3424 to_i32(lwork, "lwork")?,
3425 dev_info.as_mut_ptr().cast(),
3426 ))?;
3427 }
3428 Ok(())
3429}
3430
3431pub fn zungtr(
3432 ctx: &Context,
3433 fill_mode: FillMode,
3434 n: usize,
3435 a: &mut DeviceMemory<Complex64>,
3436 lda: usize,
3437 tau: &DeviceMemory<Complex64>,
3438 workspace: &mut DeviceMemory<Complex64>,
3439 dev_info: &mut DeviceMemory<i32>,
3440) -> Result<()> {
3441 ctx.bind()?;
3442 validate_orgtr_inputs(n, a.len(), lda, tau.len())?;
3443 require_info_buffer(dev_info)?;
3444 let lwork = zungtr_buffer_size(ctx, fill_mode, n, a, lda, tau)?;
3445 require_workspace(workspace.len(), lwork)?;
3446 unsafe {
3447 try_ffi!(sys::cusolverDnZungtr(
3448 ctx.as_raw(),
3449 fill_mode.into(),
3450 to_i32(n, "n")?,
3451 a.as_mut_ptr().cast(),
3452 to_i32(lda, "lda")?,
3453 tau.as_ptr().cast(),
3454 workspace.as_mut_ptr().cast(),
3455 to_i32(lwork, "lwork")?,
3456 dev_info.as_mut_ptr().cast(),
3457 ))?;
3458 }
3459 Ok(())
3460}
3461
3462pub fn sormtr_buffer_size(
3463 ctx: &Context,
3464 side: SideMode,
3465 fill_mode: FillMode,
3466 operation: Operation,
3467 m: usize,
3468 n: usize,
3469 a: &DeviceMemory<f32>,
3470 lda: usize,
3471 tau: &DeviceMemory<f32>,
3472 c: &DeviceMemory<f32>,
3473 ldc: usize,
3474) -> Result<usize> {
3475 ctx.bind()?;
3476 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3477 let mut lwork = 0;
3478 unsafe {
3479 try_ffi!(sys::cusolverDnSormtr_bufferSize(
3480 ctx.as_raw(),
3481 side.into(),
3482 fill_mode.into(),
3483 operation.into(),
3484 to_i32(m, "m")?,
3485 to_i32(n, "n")?,
3486 a.as_ptr().cast(),
3487 to_i32(lda, "lda")?,
3488 tau.as_ptr().cast(),
3489 c.as_ptr().cast(),
3490 to_i32(ldc, "ldc")?,
3491 &raw mut lwork,
3492 ))?;
3493 }
3494 to_usize(lwork, "lwork")
3495}
3496
3497pub fn dormtr_buffer_size(
3498 ctx: &Context,
3499 side: SideMode,
3500 fill_mode: FillMode,
3501 operation: Operation,
3502 m: usize,
3503 n: usize,
3504 a: &DeviceMemory<f64>,
3505 lda: usize,
3506 tau: &DeviceMemory<f64>,
3507 c: &DeviceMemory<f64>,
3508 ldc: usize,
3509) -> Result<usize> {
3510 ctx.bind()?;
3511 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3512 let mut lwork = 0;
3513 unsafe {
3514 try_ffi!(sys::cusolverDnDormtr_bufferSize(
3515 ctx.as_raw(),
3516 side.into(),
3517 fill_mode.into(),
3518 operation.into(),
3519 to_i32(m, "m")?,
3520 to_i32(n, "n")?,
3521 a.as_ptr().cast(),
3522 to_i32(lda, "lda")?,
3523 tau.as_ptr().cast(),
3524 c.as_ptr().cast(),
3525 to_i32(ldc, "ldc")?,
3526 &raw mut lwork,
3527 ))?;
3528 }
3529 to_usize(lwork, "lwork")
3530}
3531
3532pub fn cunmtr_buffer_size(
3533 ctx: &Context,
3534 side: SideMode,
3535 fill_mode: FillMode,
3536 operation: Operation,
3537 m: usize,
3538 n: usize,
3539 a: &DeviceMemory<Complex32>,
3540 lda: usize,
3541 tau: &DeviceMemory<Complex32>,
3542 c: &DeviceMemory<Complex32>,
3543 ldc: usize,
3544) -> Result<usize> {
3545 ctx.bind()?;
3546 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3547 let mut lwork = 0;
3548 unsafe {
3549 try_ffi!(sys::cusolverDnCunmtr_bufferSize(
3550 ctx.as_raw(),
3551 side.into(),
3552 fill_mode.into(),
3553 operation.into(),
3554 to_i32(m, "m")?,
3555 to_i32(n, "n")?,
3556 a.as_ptr().cast(),
3557 to_i32(lda, "lda")?,
3558 tau.as_ptr().cast(),
3559 c.as_ptr().cast(),
3560 to_i32(ldc, "ldc")?,
3561 &raw mut lwork,
3562 ))?;
3563 }
3564 to_usize(lwork, "lwork")
3565}
3566
3567pub fn zunmtr_buffer_size(
3568 ctx: &Context,
3569 side: SideMode,
3570 fill_mode: FillMode,
3571 operation: Operation,
3572 m: usize,
3573 n: usize,
3574 a: &DeviceMemory<Complex64>,
3575 lda: usize,
3576 tau: &DeviceMemory<Complex64>,
3577 c: &DeviceMemory<Complex64>,
3578 ldc: usize,
3579) -> Result<usize> {
3580 ctx.bind()?;
3581 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3582 let mut lwork = 0;
3583 unsafe {
3584 try_ffi!(sys::cusolverDnZunmtr_bufferSize(
3585 ctx.as_raw(),
3586 side.into(),
3587 fill_mode.into(),
3588 operation.into(),
3589 to_i32(m, "m")?,
3590 to_i32(n, "n")?,
3591 a.as_ptr().cast(),
3592 to_i32(lda, "lda")?,
3593 tau.as_ptr().cast(),
3594 c.as_ptr().cast(),
3595 to_i32(ldc, "ldc")?,
3596 &raw mut lwork,
3597 ))?;
3598 }
3599 to_usize(lwork, "lwork")
3600}
3601
3602/// Use the matching buffer-size helper to calculate the required workspace size.
3603///
3604/// The S and D data types are real valued single and double precision, respectively.
3605///
3606/// The C and Z data types are complex valued single and double precision, respectively.
3607///
3608/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
3609/// returned by `sytrd`, to `C` and stores the result in `C`.
3610///
3611/// `side` selects whether `Q` is applied from the left or right, and
3612/// `operation` selects whether `Q` is transposed.
3613///
3614/// Provide workspace through `workspace`.
3615/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3616/// The workspace size in bytes is `size_of::<T>() * lwork`.
3617///
3618/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3619///
3620/// # Errors
3621///
3622/// Returns an error if cuSOLVER has not been initialized, if the
3623/// matrix dimensions or leading dimensions are invalid, if the current GPU
3624/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3625pub fn sormtr(
3626 ctx: &Context,
3627 side: SideMode,
3628 fill_mode: FillMode,
3629 operation: Operation,
3630 m: usize,
3631 n: usize,
3632 a: &mut DeviceMemory<f32>,
3633 lda: usize,
3634 tau: &mut DeviceMemory<f32>,
3635 c: &mut DeviceMemory<f32>,
3636 ldc: usize,
3637 workspace: &mut DeviceMemory<f32>,
3638 dev_info: &mut DeviceMemory<i32>,
3639) -> Result<()> {
3640 ctx.bind()?;
3641 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3642 require_info_buffer(dev_info)?;
3643 let lwork = sormtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3644 require_workspace(workspace.len(), lwork)?;
3645 unsafe {
3646 try_ffi!(sys::cusolverDnSormtr(
3647 ctx.as_raw(),
3648 side.into(),
3649 fill_mode.into(),
3650 operation.into(),
3651 to_i32(m, "m")?,
3652 to_i32(n, "n")?,
3653 a.as_mut_ptr().cast(),
3654 to_i32(lda, "lda")?,
3655 tau.as_mut_ptr().cast(),
3656 c.as_mut_ptr().cast(),
3657 to_i32(ldc, "ldc")?,
3658 workspace.as_mut_ptr().cast(),
3659 to_i32(lwork, "lwork")?,
3660 dev_info.as_mut_ptr().cast(),
3661 ))?;
3662 }
3663 Ok(())
3664}
3665
3666/// Use the matching buffer-size helper to calculate the required workspace size.
3667///
3668/// The S and D data types are real valued single and double precision, respectively.
3669///
3670/// The C and Z data types are complex valued single and double precision, respectively.
3671///
3672/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
3673/// returned by `sytrd`, to `C` and stores the result in `C`.
3674///
3675/// `side` selects whether `Q` is applied from the left or right, and
3676/// `operation` selects whether `Q` is transposed.
3677///
3678/// Provide workspace through `workspace`.
3679/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3680/// The workspace size in bytes is `size_of::<T>() * lwork`.
3681///
3682/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3683///
3684/// # Errors
3685///
3686/// Returns an error if cuSOLVER has not been initialized, if the
3687/// matrix dimensions or leading dimensions are invalid, if the current GPU
3688/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3689pub fn dormtr(
3690 ctx: &Context,
3691 side: SideMode,
3692 fill_mode: FillMode,
3693 operation: Operation,
3694 m: usize,
3695 n: usize,
3696 a: &mut DeviceMemory<f64>,
3697 lda: usize,
3698 tau: &mut DeviceMemory<f64>,
3699 c: &mut DeviceMemory<f64>,
3700 ldc: usize,
3701 workspace: &mut DeviceMemory<f64>,
3702 dev_info: &mut DeviceMemory<i32>,
3703) -> Result<()> {
3704 ctx.bind()?;
3705 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3706 require_info_buffer(dev_info)?;
3707 let lwork = dormtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3708 require_workspace(workspace.len(), lwork)?;
3709 unsafe {
3710 try_ffi!(sys::cusolverDnDormtr(
3711 ctx.as_raw(),
3712 side.into(),
3713 fill_mode.into(),
3714 operation.into(),
3715 to_i32(m, "m")?,
3716 to_i32(n, "n")?,
3717 a.as_mut_ptr().cast(),
3718 to_i32(lda, "lda")?,
3719 tau.as_mut_ptr().cast(),
3720 c.as_mut_ptr().cast(),
3721 to_i32(ldc, "ldc")?,
3722 workspace.as_mut_ptr().cast(),
3723 to_i32(lwork, "lwork")?,
3724 dev_info.as_mut_ptr().cast(),
3725 ))?;
3726 }
3727 Ok(())
3728}
3729
3730pub fn cunmtr(
3731 ctx: &Context,
3732 side: SideMode,
3733 fill_mode: FillMode,
3734 operation: Operation,
3735 m: usize,
3736 n: usize,
3737 a: &mut DeviceMemory<Complex32>,
3738 lda: usize,
3739 tau: &mut DeviceMemory<Complex32>,
3740 c: &mut DeviceMemory<Complex32>,
3741 ldc: usize,
3742 workspace: &mut DeviceMemory<Complex32>,
3743 dev_info: &mut DeviceMemory<i32>,
3744) -> Result<()> {
3745 ctx.bind()?;
3746 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3747 require_info_buffer(dev_info)?;
3748 let lwork = cunmtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3749 require_workspace(workspace.len(), lwork)?;
3750 unsafe {
3751 try_ffi!(sys::cusolverDnCunmtr(
3752 ctx.as_raw(),
3753 side.into(),
3754 fill_mode.into(),
3755 operation.into(),
3756 to_i32(m, "m")?,
3757 to_i32(n, "n")?,
3758 a.as_mut_ptr().cast(),
3759 to_i32(lda, "lda")?,
3760 tau.as_mut_ptr().cast(),
3761 c.as_mut_ptr().cast(),
3762 to_i32(ldc, "ldc")?,
3763 workspace.as_mut_ptr().cast(),
3764 to_i32(lwork, "lwork")?,
3765 dev_info.as_mut_ptr().cast(),
3766 ))?;
3767 }
3768 Ok(())
3769}
3770
3771pub fn zunmtr(
3772 ctx: &Context,
3773 side: SideMode,
3774 fill_mode: FillMode,
3775 operation: Operation,
3776 m: usize,
3777 n: usize,
3778 a: &mut DeviceMemory<Complex64>,
3779 lda: usize,
3780 tau: &mut DeviceMemory<Complex64>,
3781 c: &mut DeviceMemory<Complex64>,
3782 ldc: usize,
3783 workspace: &mut DeviceMemory<Complex64>,
3784 dev_info: &mut DeviceMemory<i32>,
3785) -> Result<()> {
3786 ctx.bind()?;
3787 validate_ormtr_inputs(side, m, n, a.len(), lda, tau.len(), c.len(), ldc)?;
3788 require_info_buffer(dev_info)?;
3789 let lwork = zunmtr_buffer_size(ctx, side, fill_mode, operation, m, n, a, lda, tau, c, ldc)?;
3790 require_workspace(workspace.len(), lwork)?;
3791 unsafe {
3792 try_ffi!(sys::cusolverDnZunmtr(
3793 ctx.as_raw(),
3794 side.into(),
3795 fill_mode.into(),
3796 operation.into(),
3797 to_i32(m, "m")?,
3798 to_i32(n, "n")?,
3799 a.as_mut_ptr().cast(),
3800 to_i32(lda, "lda")?,
3801 tau.as_mut_ptr().cast(),
3802 c.as_mut_ptr().cast(),
3803 to_i32(ldc, "ldc")?,
3804 workspace.as_mut_ptr().cast(),
3805 to_i32(lwork, "lwork")?,
3806 dev_info.as_mut_ptr().cast(),
3807 ))?;
3808 }
3809 Ok(())
3810}
3811
3812pub fn sgeqrf_buffer_size(
3813 ctx: &Context,
3814 m: usize,
3815 n: usize,
3816 a: &mut DeviceMemory<f32>,
3817 lda: usize,
3818) -> Result<usize> {
3819 ctx.bind()?;
3820 validate_matrix(m, n, a.len(), lda)?;
3821 let mut lwork = 0;
3822 unsafe {
3823 try_ffi!(sys::cusolverDnSgeqrf_bufferSize(
3824 ctx.as_raw(),
3825 to_i32(m, "m")?,
3826 to_i32(n, "n")?,
3827 a.as_mut_ptr().cast(),
3828 to_i32(lda, "lda")?,
3829 &raw mut lwork,
3830 ))?;
3831 }
3832 to_usize(lwork, "lwork")
3833}
3834
3835pub fn dgeqrf_buffer_size(
3836 ctx: &Context,
3837 m: usize,
3838 n: usize,
3839 a: &mut DeviceMemory<f64>,
3840 lda: usize,
3841) -> Result<usize> {
3842 ctx.bind()?;
3843 validate_matrix(m, n, a.len(), lda)?;
3844 let mut lwork = 0;
3845 unsafe {
3846 try_ffi!(sys::cusolverDnDgeqrf_bufferSize(
3847 ctx.as_raw(),
3848 to_i32(m, "m")?,
3849 to_i32(n, "n")?,
3850 a.as_mut_ptr().cast(),
3851 to_i32(lda, "lda")?,
3852 &raw mut lwork,
3853 ))?;
3854 }
3855 to_usize(lwork, "lwork")
3856}
3857
3858pub fn cgeqrf_buffer_size(
3859 ctx: &Context,
3860 m: usize,
3861 n: usize,
3862 a: &mut DeviceMemory<Complex32>,
3863 lda: usize,
3864) -> Result<usize> {
3865 ctx.bind()?;
3866 validate_matrix(m, n, a.len(), lda)?;
3867 let mut lwork = 0;
3868 unsafe {
3869 try_ffi!(sys::cusolverDnCgeqrf_bufferSize(
3870 ctx.as_raw(),
3871 to_i32(m, "m")?,
3872 to_i32(n, "n")?,
3873 a.as_mut_ptr().cast(),
3874 to_i32(lda, "lda")?,
3875 &raw mut lwork,
3876 ))?;
3877 }
3878 to_usize(lwork, "lwork")
3879}
3880
3881pub fn zgeqrf_buffer_size(
3882 ctx: &Context,
3883 m: usize,
3884 n: usize,
3885 a: &mut DeviceMemory<Complex64>,
3886 lda: usize,
3887) -> Result<usize> {
3888 ctx.bind()?;
3889 validate_matrix(m, n, a.len(), lda)?;
3890 let mut lwork = 0;
3891 unsafe {
3892 try_ffi!(sys::cusolverDnZgeqrf_bufferSize(
3893 ctx.as_raw(),
3894 to_i32(m, "m")?,
3895 to_i32(n, "n")?,
3896 a.as_mut_ptr().cast(),
3897 to_i32(lda, "lda")?,
3898 &raw mut lwork,
3899 ))?;
3900 }
3901 to_usize(lwork, "lwork")
3902}
3903
3904/// Use the matching buffer-size helper to calculate the required workspace size.
3905///
3906/// The S and D data types are real valued single and double precision, respectively.
3907///
3908/// The C and Z data types are complex valued single and double precision, respectively.
3909///
3910/// Computes the QR factorization of an $m \times n$ matrix
3911///
3912/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
3913///
3914/// Provide workspace through `workspace`.
3915/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3916///
3917/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
3918///
3919/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
3920/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `Ï„`.
3921/// If `v` is original householder vector, `q` is the new householder vector corresponding to `Ï„`, satisfying the following relation
3922///
3923/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3924///
3925/// # Errors
3926///
3927/// Returns an error if cuSOLVER has not been initialized, if the
3928/// matrix dimensions or leading dimension are invalid, if the current GPU
3929/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3930pub fn sgeqrf(
3931 ctx: &Context,
3932 m: usize,
3933 n: usize,
3934 a: &mut DeviceMemory<f32>,
3935 lda: usize,
3936 tau: &mut DeviceMemory<f32>,
3937 workspace: &mut DeviceMemory<f32>,
3938 dev_info: &mut DeviceMemory<i32>,
3939) -> Result<()> {
3940 ctx.bind()?;
3941 validate_matrix(m, n, a.len(), lda)?;
3942 require_tau_buffer(tau, m.min(n))?;
3943 require_info_buffer(dev_info)?;
3944 let lwork = sgeqrf_buffer_size(ctx, m, n, a, lda)?;
3945 require_workspace(workspace.len(), lwork)?;
3946 unsafe {
3947 try_ffi!(sys::cusolverDnSgeqrf(
3948 ctx.as_raw(),
3949 to_i32(m, "m")?,
3950 to_i32(n, "n")?,
3951 a.as_mut_ptr().cast(),
3952 to_i32(lda, "lda")?,
3953 tau.as_mut_ptr().cast(),
3954 workspace.as_mut_ptr().cast(),
3955 to_i32(lwork, "lwork")?,
3956 dev_info.as_mut_ptr().cast(),
3957 ))?;
3958 }
3959 Ok(())
3960}
3961
3962/// Use the matching buffer-size helper to calculate the required workspace size.
3963///
3964/// The S and D data types are real valued single and double precision, respectively.
3965///
3966/// The C and Z data types are complex valued single and double precision, respectively.
3967///
3968/// Computes the QR factorization of an $m \times n$ matrix
3969///
3970/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
3971///
3972/// Provide workspace through `workspace`.
3973/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
3974///
3975/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
3976///
3977/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
3978/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `Ï„`.
3979/// If `v` is original householder vector, `q` is the new householder vector corresponding to `Ï„`, satisfying the following relation
3980///
3981/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
3982///
3983/// # Errors
3984///
3985/// Returns an error if cuSOLVER has not been initialized, if the
3986/// matrix dimensions or leading dimension are invalid, if the current GPU
3987/// architecture is unsupported, or if cuSOLVER reports an internal failure.
3988pub fn dgeqrf(
3989 ctx: &Context,
3990 m: usize,
3991 n: usize,
3992 a: &mut DeviceMemory<f64>,
3993 lda: usize,
3994 tau: &mut DeviceMemory<f64>,
3995 workspace: &mut DeviceMemory<f64>,
3996 dev_info: &mut DeviceMemory<i32>,
3997) -> Result<()> {
3998 ctx.bind()?;
3999 validate_matrix(m, n, a.len(), lda)?;
4000 require_tau_buffer(tau, m.min(n))?;
4001 require_info_buffer(dev_info)?;
4002 let lwork = dgeqrf_buffer_size(ctx, m, n, a, lda)?;
4003 require_workspace(workspace.len(), lwork)?;
4004 unsafe {
4005 try_ffi!(sys::cusolverDnDgeqrf(
4006 ctx.as_raw(),
4007 to_i32(m, "m")?,
4008 to_i32(n, "n")?,
4009 a.as_mut_ptr().cast(),
4010 to_i32(lda, "lda")?,
4011 tau.as_mut_ptr().cast(),
4012 workspace.as_mut_ptr().cast(),
4013 to_i32(lwork, "lwork")?,
4014 dev_info.as_mut_ptr().cast(),
4015 ))?;
4016 }
4017 Ok(())
4018}
4019
4020/// Use the matching buffer-size helper to calculate the required workspace size.
4021///
4022/// The S and D data types are real valued single and double precision, respectively.
4023///
4024/// The C and Z data types are complex valued single and double precision, respectively.
4025///
4026/// Computes the QR factorization of an $m \times n$ matrix
4027///
4028/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
4029///
4030/// Provide workspace through `workspace`.
4031/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4032///
4033/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
4034///
4035/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
4036/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `Ï„`.
4037/// If `v` is original householder vector, `q` is the new householder vector corresponding to `Ï„`, satisfying the following relation
4038///
4039/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4040///
4041/// # Errors
4042///
4043/// Returns an error if cuSOLVER has not been initialized, if the
4044/// matrix dimensions or leading dimension are invalid, if the current GPU
4045/// architecture is unsupported, or if cuSOLVER reports an internal failure.
4046pub fn cgeqrf(
4047 ctx: &Context,
4048 m: usize,
4049 n: usize,
4050 a: &mut DeviceMemory<Complex32>,
4051 lda: usize,
4052 tau: &mut DeviceMemory<Complex32>,
4053 workspace: &mut DeviceMemory<Complex32>,
4054 dev_info: &mut DeviceMemory<i32>,
4055) -> Result<()> {
4056 ctx.bind()?;
4057 validate_matrix(m, n, a.len(), lda)?;
4058 require_tau_buffer(tau, m.min(n))?;
4059 require_info_buffer(dev_info)?;
4060 let lwork = cgeqrf_buffer_size(ctx, m, n, a, lda)?;
4061 require_workspace(workspace.len(), lwork)?;
4062 unsafe {
4063 try_ffi!(sys::cusolverDnCgeqrf(
4064 ctx.as_raw(),
4065 to_i32(m, "m")?,
4066 to_i32(n, "n")?,
4067 a.as_mut_ptr().cast(),
4068 to_i32(lda, "lda")?,
4069 tau.as_mut_ptr().cast(),
4070 workspace.as_mut_ptr().cast(),
4071 to_i32(lwork, "lwork")?,
4072 dev_info.as_mut_ptr().cast(),
4073 ))?;
4074 }
4075 Ok(())
4076}
4077
4078/// Use the matching buffer-size helper to calculate the required workspace size.
4079///
4080/// The S and D data types are real valued single and double precision, respectively.
4081///
4082/// The C and Z data types are complex valued single and double precision, respectively.
4083///
4084/// Computes the QR factorization of an $m \times n$ matrix
4085///
4086/// where `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and `R` is a $n \times n$ upper triangular matrix.
4087///
4088/// Provide workspace through `workspace`.
4089/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4090///
4091/// The matrix `R` is overwritten in upper triangular part of `A`, including diagonal elements.
4092///
4093/// The matrix `Q` is not formed explicitly, instead, a sequence of householder vectors are stored in lower triangular part of `A`.
4094/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `Ï„`.
4095/// If `v` is original householder vector, `q` is the new householder vector corresponding to `Ï„`, satisfying the following relation
4096///
4097/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4098///
4099/// # Errors
4100///
4101/// Returns an error if cuSOLVER has not been initialized, if the
4102/// matrix dimensions or leading dimension are invalid, if the current GPU
4103/// architecture is unsupported, or if cuSOLVER reports an internal failure.
4104pub fn zgeqrf(
4105 ctx: &Context,
4106 m: usize,
4107 n: usize,
4108 a: &mut DeviceMemory<Complex64>,
4109 lda: usize,
4110 tau: &mut DeviceMemory<Complex64>,
4111 workspace: &mut DeviceMemory<Complex64>,
4112 dev_info: &mut DeviceMemory<i32>,
4113) -> Result<()> {
4114 ctx.bind()?;
4115 validate_matrix(m, n, a.len(), lda)?;
4116 require_tau_buffer(tau, m.min(n))?;
4117 require_info_buffer(dev_info)?;
4118 let lwork = zgeqrf_buffer_size(ctx, m, n, a, lda)?;
4119 require_workspace(workspace.len(), lwork)?;
4120 unsafe {
4121 try_ffi!(sys::cusolverDnZgeqrf(
4122 ctx.as_raw(),
4123 to_i32(m, "m")?,
4124 to_i32(n, "n")?,
4125 a.as_mut_ptr().cast(),
4126 to_i32(lda, "lda")?,
4127 tau.as_mut_ptr().cast(),
4128 workspace.as_mut_ptr().cast(),
4129 to_i32(lwork, "lwork")?,
4130 dev_info.as_mut_ptr().cast(),
4131 ))?;
4132 }
4133 Ok(())
4134}
4135
4136pub fn sorgqr_buffer_size(
4137 ctx: &Context,
4138 m: usize,
4139 n: usize,
4140 k: usize,
4141 a: &DeviceMemory<f32>,
4142 lda: usize,
4143 tau: &DeviceMemory<f32>,
4144) -> Result<usize> {
4145 ctx.bind()?;
4146 validate_matrix(m, n, a.len(), lda)?;
4147 require_tau_buffer(tau, k)?;
4148 let mut lwork = 0;
4149 unsafe {
4150 try_ffi!(sys::cusolverDnSorgqr_bufferSize(
4151 ctx.as_raw(),
4152 to_i32(m, "m")?,
4153 to_i32(n, "n")?,
4154 to_i32(k, "k")?,
4155 a.as_ptr().cast(),
4156 to_i32(lda, "lda")?,
4157 tau.as_ptr().cast(),
4158 &raw mut lwork,
4159 ))?;
4160 }
4161 to_usize(lwork, "lwork")
4162}
4163
4164pub fn dorgqr_buffer_size(
4165 ctx: &Context,
4166 m: usize,
4167 n: usize,
4168 k: usize,
4169 a: &DeviceMemory<f64>,
4170 lda: usize,
4171 tau: &DeviceMemory<f64>,
4172) -> Result<usize> {
4173 ctx.bind()?;
4174 validate_matrix(m, n, a.len(), lda)?;
4175 require_tau_buffer(tau, k)?;
4176 let mut lwork = 0;
4177 unsafe {
4178 try_ffi!(sys::cusolverDnDorgqr_bufferSize(
4179 ctx.as_raw(),
4180 to_i32(m, "m")?,
4181 to_i32(n, "n")?,
4182 to_i32(k, "k")?,
4183 a.as_ptr().cast(),
4184 to_i32(lda, "lda")?,
4185 tau.as_ptr().cast(),
4186 &raw mut lwork,
4187 ))?;
4188 }
4189 to_usize(lwork, "lwork")
4190}
4191
4192pub fn cungqr_buffer_size(
4193 ctx: &Context,
4194 m: usize,
4195 n: usize,
4196 k: usize,
4197 a: &DeviceMemory<Complex32>,
4198 lda: usize,
4199 tau: &DeviceMemory<Complex32>,
4200) -> Result<usize> {
4201 ctx.bind()?;
4202 validate_matrix(m, n, a.len(), lda)?;
4203 require_tau_buffer(tau, k)?;
4204 let mut lwork = 0;
4205 unsafe {
4206 try_ffi!(sys::cusolverDnCungqr_bufferSize(
4207 ctx.as_raw(),
4208 to_i32(m, "m")?,
4209 to_i32(n, "n")?,
4210 to_i32(k, "k")?,
4211 a.as_ptr().cast(),
4212 to_i32(lda, "lda")?,
4213 tau.as_ptr().cast(),
4214 &raw mut lwork,
4215 ))?;
4216 }
4217 to_usize(lwork, "lwork")
4218}
4219
4220pub fn zungqr_buffer_size(
4221 ctx: &Context,
4222 m: usize,
4223 n: usize,
4224 k: usize,
4225 a: &DeviceMemory<Complex64>,
4226 lda: usize,
4227 tau: &DeviceMemory<Complex64>,
4228) -> Result<usize> {
4229 ctx.bind()?;
4230 validate_matrix(m, n, a.len(), lda)?;
4231 require_tau_buffer(tau, k)?;
4232 let mut lwork = 0;
4233 unsafe {
4234 try_ffi!(sys::cusolverDnZungqr_bufferSize(
4235 ctx.as_raw(),
4236 to_i32(m, "m")?,
4237 to_i32(n, "n")?,
4238 to_i32(k, "k")?,
4239 a.as_ptr().cast(),
4240 to_i32(lda, "lda")?,
4241 tau.as_ptr().cast(),
4242 &raw mut lwork,
4243 ))?;
4244 }
4245 to_usize(lwork, "lwork")
4246}
4247
4248/// Use the matching buffer-size helper to calculate the required workspace size.
4249///
4250/// The S and D data types are real valued single and double precision, respectively.
4251///
4252/// The C and Z data types are complex valued single and double precision, respectively.
4253///
4254/// Generates the first `n` columns of the orthogonal matrix `Q` from the
4255/// elementary reflectors returned by `geqrf` and stores them in `A`.
4256///
4257/// Provide workspace through `workspace`.
4258/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4259/// The workspace size in bytes is `size_of::<T>() * lwork`.
4260///
4261/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4262///
4263/// Callers can combine `geqrf` and `orgqr` to complete orthogonalization.
4264///
4265/// # Errors
4266///
4267/// Returns an error if cuSOLVER has not been initialized, if the
4268/// matrix dimensions, reflector count, or leading dimension are invalid, if
4269/// the current GPU architecture is unsupported, or if cuSOLVER reports an
4270/// internal failure.
4271pub fn sorgqr(
4272 ctx: &Context,
4273 m: usize,
4274 n: usize,
4275 k: usize,
4276 a: &mut DeviceMemory<f32>,
4277 lda: usize,
4278 tau: &DeviceMemory<f32>,
4279 workspace: &mut DeviceMemory<f32>,
4280 dev_info: &mut DeviceMemory<i32>,
4281) -> Result<()> {
4282 ctx.bind()?;
4283 validate_matrix(m, n, a.len(), lda)?;
4284 require_tau_buffer(tau, k)?;
4285 require_info_buffer(dev_info)?;
4286 let lwork = sorgqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4287 require_workspace(workspace.len(), lwork)?;
4288 unsafe {
4289 try_ffi!(sys::cusolverDnSorgqr(
4290 ctx.as_raw(),
4291 to_i32(m, "m")?,
4292 to_i32(n, "n")?,
4293 to_i32(k, "k")?,
4294 a.as_mut_ptr().cast(),
4295 to_i32(lda, "lda")?,
4296 tau.as_ptr().cast(),
4297 workspace.as_mut_ptr().cast(),
4298 to_i32(lwork, "lwork")?,
4299 dev_info.as_mut_ptr().cast(),
4300 ))?;
4301 }
4302 Ok(())
4303}
4304
4305/// Use the matching buffer-size helper to calculate the required workspace size.
4306///
4307/// The S and D data types are real valued single and double precision, respectively.
4308///
4309/// The C and Z data types are complex valued single and double precision, respectively.
4310///
4311/// Generates the first `n` columns of the orthogonal matrix `Q` from the
4312/// elementary reflectors returned by `geqrf` and stores them in `A`.
4313///
4314/// Provide workspace through `workspace`.
4315/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4316/// The workspace size in bytes is `size_of::<T>() * lwork`.
4317///
4318/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4319///
4320/// Callers can combine `geqrf` and `orgqr` to complete orthogonalization.
4321///
4322/// # Errors
4323///
4324/// Returns an error if cuSOLVER has not been initialized, if the
4325/// matrix dimensions, reflector count, or leading dimension are invalid, if
4326/// the current GPU architecture is unsupported, or if cuSOLVER reports an
4327/// internal failure.
4328pub fn dorgqr(
4329 ctx: &Context,
4330 m: usize,
4331 n: usize,
4332 k: usize,
4333 a: &mut DeviceMemory<f64>,
4334 lda: usize,
4335 tau: &DeviceMemory<f64>,
4336 workspace: &mut DeviceMemory<f64>,
4337 dev_info: &mut DeviceMemory<i32>,
4338) -> Result<()> {
4339 ctx.bind()?;
4340 validate_matrix(m, n, a.len(), lda)?;
4341 require_tau_buffer(tau, k)?;
4342 require_info_buffer(dev_info)?;
4343 let lwork = dorgqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4344 require_workspace(workspace.len(), lwork)?;
4345 unsafe {
4346 try_ffi!(sys::cusolverDnDorgqr(
4347 ctx.as_raw(),
4348 to_i32(m, "m")?,
4349 to_i32(n, "n")?,
4350 to_i32(k, "k")?,
4351 a.as_mut_ptr().cast(),
4352 to_i32(lda, "lda")?,
4353 tau.as_ptr().cast(),
4354 workspace.as_mut_ptr().cast(),
4355 to_i32(lwork, "lwork")?,
4356 dev_info.as_mut_ptr().cast(),
4357 ))?;
4358 }
4359 Ok(())
4360}
4361
4362pub fn cungqr(
4363 ctx: &Context,
4364 m: usize,
4365 n: usize,
4366 k: usize,
4367 a: &mut DeviceMemory<Complex32>,
4368 lda: usize,
4369 tau: &DeviceMemory<Complex32>,
4370 workspace: &mut DeviceMemory<Complex32>,
4371 dev_info: &mut DeviceMemory<i32>,
4372) -> Result<()> {
4373 ctx.bind()?;
4374 validate_matrix(m, n, a.len(), lda)?;
4375 require_tau_buffer(tau, k)?;
4376 require_info_buffer(dev_info)?;
4377 let lwork = cungqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4378 require_workspace(workspace.len(), lwork)?;
4379 unsafe {
4380 try_ffi!(sys::cusolverDnCungqr(
4381 ctx.as_raw(),
4382 to_i32(m, "m")?,
4383 to_i32(n, "n")?,
4384 to_i32(k, "k")?,
4385 a.as_mut_ptr().cast(),
4386 to_i32(lda, "lda")?,
4387 tau.as_ptr().cast(),
4388 workspace.as_mut_ptr().cast(),
4389 to_i32(lwork, "lwork")?,
4390 dev_info.as_mut_ptr().cast(),
4391 ))?;
4392 }
4393 Ok(())
4394}
4395
4396pub fn zungqr(
4397 ctx: &Context,
4398 m: usize,
4399 n: usize,
4400 k: usize,
4401 a: &mut DeviceMemory<Complex64>,
4402 lda: usize,
4403 tau: &DeviceMemory<Complex64>,
4404 workspace: &mut DeviceMemory<Complex64>,
4405 dev_info: &mut DeviceMemory<i32>,
4406) -> Result<()> {
4407 ctx.bind()?;
4408 validate_matrix(m, n, a.len(), lda)?;
4409 require_tau_buffer(tau, k)?;
4410 require_info_buffer(dev_info)?;
4411 let lwork = zungqr_buffer_size(ctx, m, n, k, a, lda, tau)?;
4412 require_workspace(workspace.len(), lwork)?;
4413 unsafe {
4414 try_ffi!(sys::cusolverDnZungqr(
4415 ctx.as_raw(),
4416 to_i32(m, "m")?,
4417 to_i32(n, "n")?,
4418 to_i32(k, "k")?,
4419 a.as_mut_ptr().cast(),
4420 to_i32(lda, "lda")?,
4421 tau.as_ptr().cast(),
4422 workspace.as_mut_ptr().cast(),
4423 to_i32(lwork, "lwork")?,
4424 dev_info.as_mut_ptr().cast(),
4425 ))?;
4426 }
4427 Ok(())
4428}
4429
4430pub fn sormqr_buffer_size(
4431 ctx: &Context,
4432 side: SideMode,
4433 operation: Operation,
4434 m: usize,
4435 n: usize,
4436 k: usize,
4437 a: &DeviceMemory<f32>,
4438 lda: usize,
4439 tau: &DeviceMemory<f32>,
4440 c: &DeviceMemory<f32>,
4441 ldc: usize,
4442) -> Result<usize> {
4443 ctx.bind()?;
4444 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4445 require_tau_buffer(tau, k)?;
4446 validate_matrix(m, n, c.len(), ldc)?;
4447 let mut lwork = 0;
4448 unsafe {
4449 try_ffi!(sys::cusolverDnSormqr_bufferSize(
4450 ctx.as_raw(),
4451 side.into(),
4452 operation.into(),
4453 to_i32(m, "m")?,
4454 to_i32(n, "n")?,
4455 to_i32(k, "k")?,
4456 a.as_ptr().cast(),
4457 to_i32(lda, "lda")?,
4458 tau.as_ptr().cast(),
4459 c.as_ptr().cast(),
4460 to_i32(ldc, "ldc")?,
4461 &raw mut lwork,
4462 ))?;
4463 }
4464 to_usize(lwork, "lwork")
4465}
4466
4467pub fn dormqr_buffer_size(
4468 ctx: &Context,
4469 side: SideMode,
4470 operation: Operation,
4471 m: usize,
4472 n: usize,
4473 k: usize,
4474 a: &DeviceMemory<f64>,
4475 lda: usize,
4476 tau: &DeviceMemory<f64>,
4477 c: &DeviceMemory<f64>,
4478 ldc: usize,
4479) -> Result<usize> {
4480 ctx.bind()?;
4481 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4482 require_tau_buffer(tau, k)?;
4483 validate_matrix(m, n, c.len(), ldc)?;
4484 let mut lwork = 0;
4485 unsafe {
4486 try_ffi!(sys::cusolverDnDormqr_bufferSize(
4487 ctx.as_raw(),
4488 side.into(),
4489 operation.into(),
4490 to_i32(m, "m")?,
4491 to_i32(n, "n")?,
4492 to_i32(k, "k")?,
4493 a.as_ptr().cast(),
4494 to_i32(lda, "lda")?,
4495 tau.as_ptr().cast(),
4496 c.as_ptr().cast(),
4497 to_i32(ldc, "ldc")?,
4498 &raw mut lwork,
4499 ))?;
4500 }
4501 to_usize(lwork, "lwork")
4502}
4503
4504pub fn cunmqr_buffer_size(
4505 ctx: &Context,
4506 side: SideMode,
4507 operation: Operation,
4508 m: usize,
4509 n: usize,
4510 k: usize,
4511 a: &DeviceMemory<Complex32>,
4512 lda: usize,
4513 tau: &DeviceMemory<Complex32>,
4514 c: &DeviceMemory<Complex32>,
4515 ldc: usize,
4516) -> Result<usize> {
4517 ctx.bind()?;
4518 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4519 require_tau_buffer(tau, k)?;
4520 validate_matrix(m, n, c.len(), ldc)?;
4521 let mut lwork = 0;
4522 unsafe {
4523 try_ffi!(sys::cusolverDnCunmqr_bufferSize(
4524 ctx.as_raw(),
4525 side.into(),
4526 operation.into(),
4527 to_i32(m, "m")?,
4528 to_i32(n, "n")?,
4529 to_i32(k, "k")?,
4530 a.as_ptr().cast(),
4531 to_i32(lda, "lda")?,
4532 tau.as_ptr().cast(),
4533 c.as_ptr().cast(),
4534 to_i32(ldc, "ldc")?,
4535 &raw mut lwork,
4536 ))?;
4537 }
4538 to_usize(lwork, "lwork")
4539}
4540
4541pub fn zunmqr_buffer_size(
4542 ctx: &Context,
4543 side: SideMode,
4544 operation: Operation,
4545 m: usize,
4546 n: usize,
4547 k: usize,
4548 a: &DeviceMemory<Complex64>,
4549 lda: usize,
4550 tau: &DeviceMemory<Complex64>,
4551 c: &DeviceMemory<Complex64>,
4552 ldc: usize,
4553) -> Result<usize> {
4554 ctx.bind()?;
4555 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4556 require_tau_buffer(tau, k)?;
4557 validate_matrix(m, n, c.len(), ldc)?;
4558 let mut lwork = 0;
4559 unsafe {
4560 try_ffi!(sys::cusolverDnZunmqr_bufferSize(
4561 ctx.as_raw(),
4562 side.into(),
4563 operation.into(),
4564 to_i32(m, "m")?,
4565 to_i32(n, "n")?,
4566 to_i32(k, "k")?,
4567 a.as_ptr().cast(),
4568 to_i32(lda, "lda")?,
4569 tau.as_ptr().cast(),
4570 c.as_ptr().cast(),
4571 to_i32(ldc, "ldc")?,
4572 &raw mut lwork,
4573 ))?;
4574 }
4575 to_usize(lwork, "lwork")
4576}
4577
4578/// Use the matching buffer-size helper to calculate the required workspace size.
4579///
4580/// The S and D data types are real valued single and double precision, respectively.
4581///
4582/// The C and Z data types are complex valued single and double precision, respectively.
4583///
4584/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
4585/// returned by `geqrf`, to `C` and stores the result in `C`.
4586///
4587/// `operation` selects whether `Q` is transposed.
4588///
4589/// `Q` is of order `m` if `side` = [`SideMode::Left`] and of order `n` if `side` = [`SideMode::Right`].
4590///
4591/// Provide workspace through `workspace`.
4592/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4593/// The workspace size in bytes is `size_of::<T>() * lwork`.
4594///
4595/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4596///
4597/// Callers can combine `geqrf`, `ormqr`, and `trsm` to complete a linear solver or a least-square solver.
4598///
4599/// # Errors
4600///
4601/// Returns an error if cuSOLVER has not been initialized, if the
4602/// matrix dimensions, reflector count, side/operation mode, or leading
4603/// dimensions are invalid, if the current GPU architecture is unsupported, or
4604/// if cuSOLVER reports an internal failure.
4605pub fn sormqr(
4606 ctx: &Context,
4607 side: SideMode,
4608 operation: Operation,
4609 m: usize,
4610 n: usize,
4611 k: usize,
4612 a: &DeviceMemory<f32>,
4613 lda: usize,
4614 tau: &DeviceMemory<f32>,
4615 c: &mut DeviceMemory<f32>,
4616 ldc: usize,
4617 workspace: &mut DeviceMemory<f32>,
4618 dev_info: &mut DeviceMemory<i32>,
4619) -> Result<()> {
4620 ctx.bind()?;
4621 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4622 require_tau_buffer(tau, k)?;
4623 validate_matrix(m, n, c.len(), ldc)?;
4624 require_info_buffer(dev_info)?;
4625 let lwork = sormqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4626 require_workspace(workspace.len(), lwork)?;
4627 unsafe {
4628 try_ffi!(sys::cusolverDnSormqr(
4629 ctx.as_raw(),
4630 side.into(),
4631 operation.into(),
4632 to_i32(m, "m")?,
4633 to_i32(n, "n")?,
4634 to_i32(k, "k")?,
4635 a.as_ptr().cast(),
4636 to_i32(lda, "lda")?,
4637 tau.as_ptr().cast(),
4638 c.as_mut_ptr().cast(),
4639 to_i32(ldc, "ldc")?,
4640 workspace.as_mut_ptr().cast(),
4641 to_i32(lwork, "lwork")?,
4642 dev_info.as_mut_ptr().cast(),
4643 ))?;
4644 }
4645 Ok(())
4646}
4647
4648/// Use the matching buffer-size helper to calculate the required workspace size.
4649///
4650/// The S and D data types are real valued single and double precision, respectively.
4651///
4652/// The C and Z data types are complex valued single and double precision, respectively.
4653///
4654/// Applies the orthogonal matrix `Q`, represented by the elementary reflectors
4655/// returned by `geqrf`, to `C` and stores the result in `C`.
4656///
4657/// `operation` selects whether `Q` is transposed.
4658///
4659/// `Q` is of order `m` if `side` = [`SideMode::Left`] and of order `n` if `side` = [`SideMode::Right`].
4660///
4661/// Provide workspace through `workspace`.
4662/// Use the corresponding `*_buffer_size` helper to query the required workspace length.
4663/// The workspace size in bytes is `size_of::<T>() * lwork`.
4664///
4665/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
4666///
4667/// Callers can combine `geqrf`, `ormqr`, and `trsm` to complete a linear solver or a least-square solver.
4668///
4669/// # Errors
4670///
4671/// Returns an error if cuSOLVER has not been initialized, if the
4672/// matrix dimensions, reflector count, side/operation mode, or leading
4673/// dimensions are invalid, if the current GPU architecture is unsupported, or
4674/// if cuSOLVER reports an internal failure.
4675pub fn dormqr(
4676 ctx: &Context,
4677 side: SideMode,
4678 operation: Operation,
4679 m: usize,
4680 n: usize,
4681 k: usize,
4682 a: &DeviceMemory<f64>,
4683 lda: usize,
4684 tau: &DeviceMemory<f64>,
4685 c: &mut DeviceMemory<f64>,
4686 ldc: usize,
4687 workspace: &mut DeviceMemory<f64>,
4688 dev_info: &mut DeviceMemory<i32>,
4689) -> Result<()> {
4690 ctx.bind()?;
4691 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4692 require_tau_buffer(tau, k)?;
4693 validate_matrix(m, n, c.len(), ldc)?;
4694 require_info_buffer(dev_info)?;
4695 let lwork = dormqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4696 require_workspace(workspace.len(), lwork)?;
4697 unsafe {
4698 try_ffi!(sys::cusolverDnDormqr(
4699 ctx.as_raw(),
4700 side.into(),
4701 operation.into(),
4702 to_i32(m, "m")?,
4703 to_i32(n, "n")?,
4704 to_i32(k, "k")?,
4705 a.as_ptr().cast(),
4706 to_i32(lda, "lda")?,
4707 tau.as_ptr().cast(),
4708 c.as_mut_ptr().cast(),
4709 to_i32(ldc, "ldc")?,
4710 workspace.as_mut_ptr().cast(),
4711 to_i32(lwork, "lwork")?,
4712 dev_info.as_mut_ptr().cast(),
4713 ))?;
4714 }
4715 Ok(())
4716}
4717
4718pub fn cunmqr(
4719 ctx: &Context,
4720 side: SideMode,
4721 operation: Operation,
4722 m: usize,
4723 n: usize,
4724 k: usize,
4725 a: &DeviceMemory<Complex32>,
4726 lda: usize,
4727 tau: &DeviceMemory<Complex32>,
4728 c: &mut DeviceMemory<Complex32>,
4729 ldc: usize,
4730 workspace: &mut DeviceMemory<Complex32>,
4731 dev_info: &mut DeviceMemory<i32>,
4732) -> Result<()> {
4733 ctx.bind()?;
4734 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4735 require_tau_buffer(tau, k)?;
4736 validate_matrix(m, n, c.len(), ldc)?;
4737 require_info_buffer(dev_info)?;
4738 let lwork = cunmqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4739 require_workspace(workspace.len(), lwork)?;
4740 unsafe {
4741 try_ffi!(sys::cusolverDnCunmqr(
4742 ctx.as_raw(),
4743 side.into(),
4744 operation.into(),
4745 to_i32(m, "m")?,
4746 to_i32(n, "n")?,
4747 to_i32(k, "k")?,
4748 a.as_ptr().cast(),
4749 to_i32(lda, "lda")?,
4750 tau.as_ptr().cast(),
4751 c.as_mut_ptr().cast(),
4752 to_i32(ldc, "ldc")?,
4753 workspace.as_mut_ptr().cast(),
4754 to_i32(lwork, "lwork")?,
4755 dev_info.as_mut_ptr().cast(),
4756 ))?;
4757 }
4758 Ok(())
4759}
4760
4761pub fn zunmqr(
4762 ctx: &Context,
4763 side: SideMode,
4764 operation: Operation,
4765 m: usize,
4766 n: usize,
4767 k: usize,
4768 a: &DeviceMemory<Complex64>,
4769 lda: usize,
4770 tau: &DeviceMemory<Complex64>,
4771 c: &mut DeviceMemory<Complex64>,
4772 ldc: usize,
4773 workspace: &mut DeviceMemory<Complex64>,
4774 dev_info: &mut DeviceMemory<i32>,
4775) -> Result<()> {
4776 ctx.bind()?;
4777 validate_matrix(qr_rows(side, m, n), k, a.len(), lda)?;
4778 require_tau_buffer(tau, k)?;
4779 validate_matrix(m, n, c.len(), ldc)?;
4780 require_info_buffer(dev_info)?;
4781 let lwork = zunmqr_buffer_size(ctx, side, operation, m, n, k, a, lda, tau, c, ldc)?;
4782 require_workspace(workspace.len(), lwork)?;
4783 unsafe {
4784 try_ffi!(sys::cusolverDnZunmqr(
4785 ctx.as_raw(),
4786 side.into(),
4787 operation.into(),
4788 to_i32(m, "m")?,
4789 to_i32(n, "n")?,
4790 to_i32(k, "k")?,
4791 a.as_ptr().cast(),
4792 to_i32(lda, "lda")?,
4793 tau.as_ptr().cast(),
4794 c.as_mut_ptr().cast(),
4795 to_i32(ldc, "ldc")?,
4796 workspace.as_mut_ptr().cast(),
4797 to_i32(lwork, "lwork")?,
4798 dev_info.as_mut_ptr().cast(),
4799 ))?;
4800 }
4801 Ok(())
4802}
4803
4804pub fn xgeqrf_buffer_size<TA: DataTypeLike, TTau: DataTypeLike>(
4805 ctx: &Context,
4806 params: &Params,
4807 m: usize,
4808 n: usize,
4809 a: MatrixRef<'_, TA>,
4810 tau: VectorRef<'_, TTau>,
4811 compute_type: DataType,
4812) -> Result<WorkspaceSizes> {
4813 ctx.bind()?;
4814 let a_type = TA::data_type();
4815 let tau_type = TTau::data_type();
4816 validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4817 validate_x_vector(m.min(n), tau.data.byte_len(), tau_type)?;
4818 let mut device_bytes = 0;
4819 let mut host_bytes = 0;
4820 unsafe {
4821 try_ffi!(sys::cusolverDnXgeqrf_bufferSize(
4822 ctx.as_raw(),
4823 params.as_raw(),
4824 to_i64(m, "m")?,
4825 to_i64(n, "n")?,
4826 a_type.into(),
4827 a.data.as_ptr().cast(),
4828 to_i64(a.leading_dimension, "lda")?,
4829 tau_type.into(),
4830 tau.data.as_ptr().cast(),
4831 compute_type.into(),
4832 &raw mut device_bytes,
4833 &raw mut host_bytes,
4834 ))?;
4835 }
4836 Ok(WorkspaceSizes::new(
4837 device_bytes as usize,
4838 host_bytes as usize,
4839 ))
4840}
4841
4842/// Use [`xgeqrf_buffer_size`] to calculate the sizes needed for pre-allocated
4843/// workspace.
4844///
4845/// Computes the QR factorization of an $m \times n$ matrix.
4846///
4847/// Here `A` is an $m \times n$ matrix, `Q` is an $m \times n$ matrix, and
4848/// `R` is an $n \times n$ upper triangular matrix.
4849///
4850/// Provide device and host workspace through `workspace`.
4851/// Use [`xgeqrf_buffer_size`] to determine the required sizes for
4852/// `workspace.device` and `workspace.host`.
4853///
4854/// The matrix `R` overwrites the upper triangular part of `A`, including the
4855/// diagonal elements.
4856///
4857/// The matrix `Q` is not formed explicitly. Instead, a sequence of Householder
4858/// vectors is stored in the lower triangular part of `A`.
4859/// The leading nonzero element of the Householder vector is assumed to be 1, so `tau` contains the scaling factor `Ï„`.
4860/// If `v` is the original Householder vector, `q` is the new Householder vector
4861/// corresponding to `Ï„`.
4862///
4863/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
4864///
4865/// Currently, [`xgeqrf`] supports only the default algorithm.
4866///
4867/// **Algorithms supported by [`xgeqrf`]**
4868///
4869/// | Algorithm | Notes |
4870/// | --- | --- |
4871/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
4872///
4873/// List of input arguments for [`xgeqrf_buffer_size`] and [`xgeqrf`]:
4874///
4875/// The generic cuSOLVER routine separates matrix, tau-vector, and compute data types:
4876/// `data_type_a` is the data type of matrix `A`, `data_type_tau` is the data
4877/// type of `tau`, and `compute_type` is the operation's compute type.
4878/// [`xgeqrf`] only supports the following four combinations.
4879///
4880/// **Valid combination of data type and compute type**
4881///
4882/// | **data_type_a** | **compute_type** | **Meaning** |
4883/// | --- | --- | --- |
4884/// | [`DataType::F32`] | [`DataType::F32`] | `SGEQRF` |
4885/// | [`DataType::F64`] | [`DataType::F64`] | `DGEQRF` |
4886/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGEQRF` |
4887/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGEQRF` |
4888///
4889/// # Errors
4890///
4891/// Returns an error if cuSOLVER has not been initialized, if the
4892/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
4893/// an internal failure.
4894pub fn xgeqrf<TA: DataTypeLike, TTau: DataTypeLike>(
4895 ctx: &Context,
4896 params: &Params,
4897 m: usize,
4898 n: usize,
4899 a: MatrixMut<'_, TA>,
4900 tau: VectorMut<'_, TTau>,
4901 compute_type: DataType,
4902 workspace: ByteWorkspaceMut<'_>,
4903 dev_info: &mut DeviceMemory<i32>,
4904) -> Result<()> {
4905 ctx.bind()?;
4906 let a_type = TA::data_type();
4907 let tau_type = TTau::data_type();
4908 validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4909 validate_x_vector(m.min(n), tau.data.byte_len(), tau_type)?;
4910 require_info_buffer(dev_info)?;
4911 let workspace_sizes =
4912 xgeqrf_buffer_size(ctx, params, m, n, a.as_ref(), tau.as_ref(), compute_type)?;
4913 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
4914 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
4915 unsafe {
4916 try_ffi!(sys::cusolverDnXgeqrf(
4917 ctx.as_raw(),
4918 params.as_raw(),
4919 to_i64(m, "m")?,
4920 to_i64(n, "n")?,
4921 a_type.into(),
4922 a.data.as_mut_ptr().cast(),
4923 to_i64(a.leading_dimension, "lda")?,
4924 tau_type.into(),
4925 tau.data.as_mut_ptr().cast(),
4926 compute_type.into(),
4927 workspace.device.as_mut_ptr().cast(),
4928 workspace_sizes.device_bytes as _,
4929 workspace.host.as_mut_ptr().cast(),
4930 workspace_sizes.host_bytes as _,
4931 dev_info.as_mut_ptr().cast(),
4932 ))?;
4933 }
4934 Ok(())
4935}
4936
4937pub fn xpotrf_buffer_size<TA: DataTypeLike>(
4938 ctx: &Context,
4939 params: &Params,
4940 fill_mode: FillMode,
4941 n: usize,
4942 a: MatrixRef<'_, TA>,
4943 compute_type: DataType,
4944) -> Result<WorkspaceSizes> {
4945 ctx.bind()?;
4946 let a_type = TA::data_type();
4947 validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
4948 let mut device_bytes = 0;
4949 let mut host_bytes = 0;
4950 unsafe {
4951 try_ffi!(sys::cusolverDnXpotrf_bufferSize(
4952 ctx.as_raw(),
4953 params.as_raw(),
4954 fill_mode.into(),
4955 to_i64(n, "n")?,
4956 a_type.into(),
4957 a.data.as_ptr().cast(),
4958 to_i64(a.leading_dimension, "lda")?,
4959 compute_type.into(),
4960 &raw mut device_bytes,
4961 &raw mut host_bytes,
4962 ))?;
4963 }
4964 Ok(WorkspaceSizes::new(
4965 device_bytes as usize,
4966 host_bytes as usize,
4967 ))
4968}
4969
4970/// Use [`xpotrf_buffer_size`] to calculate the sizes needed for pre-allocated
4971/// workspace.
4972///
4973/// Computes the Cholesky factorization of a Hermitian positive-definite matrix.
4974///
4975/// `A` is an $n \times n$ Hermitian matrix; only its lower or upper triangular
4976/// part is meaningful.
4977/// `fill_mode` indicates which part of the matrix is used.
4978/// The operation leaves the other part untouched.
4979///
4980/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular Cholesky factor `L`.
4981///
4982/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular Cholesky factor `U`.
4983///
4984/// Provide device and host workspace through `workspace`.
4985/// Use [`xpotrf_buffer_size`] to determine the required sizes for
4986/// `workspace.device` and `workspace.host`.
4987///
4988/// If Cholesky factorization fails, some leading minor of `A` is not positive
4989/// definite, or equivalently some diagonal element of `L` or `U` is not a real
4990/// number.
4991/// `dev_info` reports the smallest leading minor of `A` that is not positive definite.
4992///
4993/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
4994///
4995/// Currently, [`xpotrf`] supports only the default algorithm.
4996///
4997/// **Algorithms supported by [`xpotrf`]**
4998///
4999/// | Algorithm | Notes |
5000/// | --- | --- |
5001/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5002///
5003/// List of input arguments for [`xpotrf_buffer_size`] and [`xpotrf`]:
5004///
5005/// The generic cuSOLVER routine separates matrix and compute data types: `data_type_a` is
5006/// the data type of matrix `A`, and `compute_type` is the operation's compute
5007/// type.
5008/// [`xpotrf`] only supports the following four combinations.
5009///
5010/// **Valid combination of data type and compute type**
5011///
5012/// | **data_type_a** | **compute_type** | **Meaning** |
5013/// | --- | --- | --- |
5014/// | [`DataType::F32`] | [`DataType::F32`] | `SPOTRF` |
5015/// | [`DataType::F64`] | [`DataType::F64`] | `DPOTRF` |
5016/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CPOTRF` |
5017/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZPOTRF` |
5018///
5019/// # Errors
5020///
5021/// Returns an error if cuSOLVER has not been initialized, if the
5022/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
5023/// an internal failure.
5024pub fn xpotrf<TA: DataTypeLike>(
5025 ctx: &Context,
5026 params: &Params,
5027 fill_mode: FillMode,
5028 n: usize,
5029 a: MatrixMut<'_, TA>,
5030 compute_type: DataType,
5031 workspace: ByteWorkspaceMut<'_>,
5032 dev_info: &mut DeviceMemory<i32>,
5033) -> Result<()> {
5034 ctx.bind()?;
5035 let a_type = TA::data_type();
5036 validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5037 require_info_buffer(dev_info)?;
5038 let workspace_sizes = xpotrf_buffer_size(ctx, params, fill_mode, n, a.as_ref(), compute_type)?;
5039 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5040 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5041 unsafe {
5042 try_ffi!(sys::cusolverDnXpotrf(
5043 ctx.as_raw(),
5044 params.as_raw(),
5045 fill_mode.into(),
5046 to_i64(n, "n")?,
5047 a_type.into(),
5048 a.data.as_mut_ptr().cast(),
5049 to_i64(a.leading_dimension, "lda")?,
5050 compute_type.into(),
5051 workspace.device.as_mut_ptr().cast(),
5052 workspace_sizes.device_bytes as _,
5053 workspace.host.as_mut_ptr().cast(),
5054 workspace_sizes.host_bytes as _,
5055 dev_info.as_mut_ptr().cast(),
5056 ))?;
5057 }
5058 Ok(())
5059}
5060
5061/// Solves a system of linear equations.
5062///
5063/// Here `A` is an $n \times n$ Hermitian matrix; only its lower or upper
5064/// triangular part is meaningful.
5065/// `fill_mode` indicates which part of the matrix is used.
5066/// The operation leaves the other part untouched.
5067///
5068/// Call [`xpotrf`] first to factorize matrix `A`.
5069/// If `fill_mode` is [`FillMode::Lower`], `A` is lower triangular Cholesky factor `L` corresponding to $A = L\cdot L^{H}$.
5070/// If `fill_mode` is [`FillMode::Upper`], `A` is upper triangular Cholesky factor `U` corresponding to $A = U^{H}\cdot U$.
5071///
5072/// The operation is in-place, that is, matrix `X` overwrites matrix `B` with the same leading dimension `ldb`.
5073///
5074/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5075///
5076/// Currently, [`xpotrs`] supports only the default algorithm.
5077///
5078/// **Algorithms supported by [`xpotrs`]**
5079///
5080/// | Algorithm | Notes |
5081/// | --- | --- |
5082/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5083///
5084/// List of input arguments for [`xpotrs`]:
5085///
5086/// The generic cuSOLVER routine separates matrix data types: `data_type_a` is the data type
5087/// of matrix `A`, and `data_type_b` is the data type of matrix `B`.
5088/// [`xpotrs`] only supports the following four combinations.
5089///
5090/// **Valid combination of data type and compute type**
5091///
5092/// | **data_type_a** | **data_type_b** | **Meaning** |
5093/// | --- | --- | --- |
5094/// | [`DataType::F32`] | [`DataType::F32`] | `SPOTRS` |
5095/// | [`DataType::F64`] | [`DataType::F64`] | `DPOTRS` |
5096/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CPOTRS` |
5097/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZPOTRS` |
5098///
5099/// # Errors
5100///
5101/// Returns an error if cuSOLVER has not been initialized, if the
5102/// matrix dimensions, right-hand-side count, or leading dimensions are
5103/// invalid, or if cuSOLVER reports an internal failure.
5104pub fn xpotrs<TA: DataTypeLike, TB: DataTypeLike>(
5105 ctx: &Context,
5106 params: &Params,
5107 fill_mode: FillMode,
5108 n: usize,
5109 nrhs: usize,
5110 a: MatrixRef<'_, TA>,
5111 b: MatrixMut<'_, TB>,
5112 dev_info: &mut DeviceMemory<i32>,
5113) -> Result<()> {
5114 ctx.bind()?;
5115 let a_type = TA::data_type();
5116 let b_type = TB::data_type();
5117 validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5118 validate_x_matrix(n, nrhs, b.data.byte_len(), b.leading_dimension, b_type)?;
5119 require_info_buffer(dev_info)?;
5120 unsafe {
5121 try_ffi!(sys::cusolverDnXpotrs(
5122 ctx.as_raw(),
5123 params.as_raw(),
5124 fill_mode.into(),
5125 to_i64(n, "n")?,
5126 to_i64(nrhs, "nrhs")?,
5127 a_type.into(),
5128 a.data.as_ptr().cast(),
5129 to_i64(a.leading_dimension, "lda")?,
5130 b_type.into(),
5131 b.data.as_mut_ptr().cast(),
5132 to_i64(b.leading_dimension, "ldb")?,
5133 dev_info.as_mut_ptr().cast(),
5134 ))?;
5135 }
5136 Ok(())
5137}
5138
5139pub fn xtrtri_buffer_size<TA: DataTypeLike>(
5140 ctx: &Context,
5141 fill_mode: FillMode,
5142 diagonal_type: DiagonalType,
5143 n: usize,
5144 a: MatrixRef<'_, TA>,
5145) -> Result<WorkspaceSizes> {
5146 ctx.bind()?;
5147 validate_x_matrix(
5148 n,
5149 n,
5150 a.data.byte_len(),
5151 a.leading_dimension,
5152 TA::data_type(),
5153 )?;
5154 let mut device_bytes = 0;
5155 let mut host_bytes = 0;
5156 unsafe {
5157 try_ffi!(sys::cusolverDnXtrtri_bufferSize(
5158 ctx.as_raw(),
5159 fill_mode.into(),
5160 diagonal_type.into(),
5161 to_i64(n, "n")?,
5162 TA::data_type().into(),
5163 a.data.as_ptr().cast_mut().cast(),
5164 to_i64(a.leading_dimension, "lda")?,
5165 &raw mut device_bytes,
5166 &raw mut host_bytes,
5167 ))?;
5168 }
5169 Ok(WorkspaceSizes::new(
5170 device_bytes as usize,
5171 host_bytes as usize,
5172 ))
5173}
5174
5175/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5176///
5177/// Computes the inverse of a triangular matrix through the generic cuSOLVER routine.
5178///
5179/// `A` is an $n \times n$ triangular matrix, only lower or upper part is meaningful.
5180/// `fill_mode` indicates which part of the matrix is used.
5181/// The other triangular part is left unchanged.
5182///
5183/// If `fill_mode` is [`FillMode::Lower`], only the lower triangular part of `A` is processed and replaced by the lower triangular inverse.
5184///
5185/// If `fill_mode` is [`FillMode::Upper`], only the upper triangular part of `A` is processed and replaced by the upper triangular inverse.
5186///
5187/// Provide device and host workspace through `workspace`.
5188/// Use [`xtrtri_buffer_size`] to determine the required sizes for
5189/// `workspace.device` and `workspace.host`.
5190///
5191/// If matrix inversion fails, `dev_info = i` shows `A(i, i) = 0`.
5192///
5193/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5194///
5195/// List of input arguments for [`xtrtri_buffer_size`] and [`xtrtri`]:
5196///
5197/// **Valid data types**
5198///
5199/// | Algorithm | Notes |
5200/// | --- | --- |
5201/// | data type | Meaning |
5202/// | [`DataType::F32`] | `STRTRI` |
5203/// | [`DataType::F64`] | `DTRTRI` |
5204/// | [`DataType::ComplexF32`] | `CTRTRI` |
5205/// | [`DataType::ComplexF64`] | `ZTRTRI` |
5206///
5207/// # Errors
5208///
5209/// Returns an error if cuSOLVER has not been initialized, if the
5210/// matrix dimensions or leading dimension are invalid, if the data type is not
5211/// supported, or if cuSOLVER reports an internal failure.
5212pub fn xtrtri<TA: DataTypeLike>(
5213 ctx: &Context,
5214 fill_mode: FillMode,
5215 diagonal_type: DiagonalType,
5216 n: usize,
5217 a: MatrixMut<'_, TA>,
5218 workspace: ByteWorkspaceMut<'_>,
5219 dev_info: &mut DeviceMemory<i32>,
5220) -> Result<()> {
5221 ctx.bind()?;
5222 validate_x_matrix(
5223 n,
5224 n,
5225 a.data.byte_len(),
5226 a.leading_dimension,
5227 TA::data_type(),
5228 )?;
5229 require_info_buffer(dev_info)?;
5230 let workspace_sizes = xtrtri_buffer_size(ctx, fill_mode, diagonal_type, n, a.as_ref())?;
5231 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5232 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5233 unsafe {
5234 try_ffi!(sys::cusolverDnXtrtri(
5235 ctx.as_raw(),
5236 fill_mode.into(),
5237 diagonal_type.into(),
5238 to_i64(n, "n")?,
5239 TA::data_type().into(),
5240 a.data.as_mut_ptr().cast(),
5241 to_i64(a.leading_dimension, "lda")?,
5242 workspace.device.as_mut_ptr().cast(),
5243 workspace_sizes.device_bytes as _,
5244 workspace.host.as_mut_ptr().cast(),
5245 workspace_sizes.host_bytes as _,
5246 dev_info.as_mut_ptr().cast(),
5247 ))?;
5248 }
5249 Ok(())
5250}
5251
5252pub fn xgetrf_buffer_size<TA: DataTypeLike>(
5253 ctx: &Context,
5254 params: &Params,
5255 m: usize,
5256 n: usize,
5257 a: MatrixRef<'_, TA>,
5258 compute_type: DataType,
5259) -> Result<WorkspaceSizes> {
5260 ctx.bind()?;
5261 let a_type = TA::data_type();
5262 validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5263 let mut device_bytes = 0;
5264 let mut host_bytes = 0;
5265 unsafe {
5266 try_ffi!(sys::cusolverDnXgetrf_bufferSize(
5267 ctx.as_raw(),
5268 params.as_raw(),
5269 to_i64(m, "m")?,
5270 to_i64(n, "n")?,
5271 a_type.into(),
5272 a.data.as_ptr().cast(),
5273 to_i64(a.leading_dimension, "lda")?,
5274 compute_type.into(),
5275 &raw mut device_bytes,
5276 &raw mut host_bytes,
5277 ))?;
5278 }
5279 Ok(WorkspaceSizes::new(
5280 device_bytes as usize,
5281 host_bytes as usize,
5282 ))
5283}
5284
5285/// Computes the LU factorization of an $m \times n$ matrix
5286///
5287/// where `A` is an $m \times n$ matrix, `P` is a permutation matrix, `L` is a lower triangular matrix with unit diagonal, and `U` is an upper triangular matrix.
5288///
5289/// If LU factorization failed, that is, matrix `A` (`U`) is singular, `dev_info = i` indicates `U(i,i) = 0`.
5290///
5291/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5292///
5293/// If `pivots` is `None`, no pivoting is performed.
5294/// The factorization is `A=L*U`, which is not numerically stable.
5295///
5296/// Whether LU factorization succeeds or fails, `pivots` contains the pivoting
5297/// sequence. Row `i` is interchanged with row `pivots[i]`.
5298///
5299/// Provide device and host workspace through `workspace`.
5300/// Use [`xgetrf_buffer_size`] to determine the required sizes for
5301/// `workspace.device` and `workspace.host`.
5302///
5303/// Callers can combine [`xgetrf`] and [`xgetrs`] to complete a linear solver.
5304///
5305/// Currently, [`xgetrf`] supports two algorithms.
5306/// To select the legacy implementation, call [`Params::set_adv_options`].
5307///
5308/// **Algorithms supported by [`xgetrf`]**
5309///
5310/// | Algorithm | Notes |
5311/// | --- | --- |
5312/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Fastest algorithm; requires a large workspace of `m*n` elements. |
5313/// | [`AlgorithmMode::Algorithm1`](crate::types::AlgorithmMode::Algorithm1) | Legacy implementation. |
5314///
5315/// List of input arguments for [`xgetrf_buffer_size`] and [`xgetrf`]:
5316///
5317/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of matrix `A`, and `compute_type` is the operation's compute type.
5318/// [`xgetrf`] only supports the following four combinations.
5319///
5320/// **Valid combination of data type and compute type**
5321///
5322/// | **data_type_a** | **compute_type** | **Meaning** |
5323/// | --- | --- | --- |
5324/// | [`DataType::F32`] | [`DataType::F32`] | `SGETRF` |
5325/// | [`DataType::F64`] | [`DataType::F64`] | `DGETRF` |
5326/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGETRF` |
5327/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGETRF` |
5328///
5329/// # Errors
5330///
5331/// Returns an error if cuSOLVER has not been initialized, if the
5332/// matrix dimensions or leading dimension are invalid, or if cuSOLVER reports
5333/// an internal failure.
5334pub fn xgetrf<TA: DataTypeLike>(
5335 ctx: &Context,
5336 params: &Params,
5337 m: usize,
5338 n: usize,
5339 a: MatrixMut<'_, TA>,
5340 pivots: Option<&mut DeviceMemory<i64>>,
5341 compute_type: DataType,
5342 workspace: ByteWorkspaceMut<'_>,
5343 dev_info: &mut DeviceMemory<i32>,
5344) -> Result<()> {
5345 ctx.bind()?;
5346 let a_type = TA::data_type();
5347 validate_x_matrix(m, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5348 if let Some(pivots) = pivots.as_ref() {
5349 require_pivot64_buffer(pivots, m.min(n))?;
5350 }
5351 require_info_buffer(dev_info)?;
5352 let workspace_sizes = xgetrf_buffer_size(ctx, params, m, n, a.as_ref(), compute_type)?;
5353 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5354 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5355 unsafe {
5356 try_ffi!(sys::cusolverDnXgetrf(
5357 ctx.as_raw(),
5358 params.as_raw(),
5359 to_i64(m, "m")?,
5360 to_i64(n, "n")?,
5361 a_type.into(),
5362 a.data.as_mut_ptr().cast(),
5363 to_i64(a.leading_dimension, "lda")?,
5364 pivots.map_or(std::ptr::null_mut(), |p| p.as_mut_ptr()),
5365 compute_type.into(),
5366 workspace.device.as_mut_ptr().cast(),
5367 workspace_sizes.device_bytes as _,
5368 workspace.host.as_mut_ptr().cast(),
5369 workspace_sizes.host_bytes as _,
5370 dev_info.as_mut_ptr().cast(),
5371 ))?;
5372 }
5373 Ok(())
5374}
5375
5376/// Solves a linear system of multiple right-hand sides
5377///
5378/// where `A` is an $n \times n$ matrix, and was LU-factored by [`xgetrf`], that is, lower triangular part of A is `L`, and upper triangular part (including diagonal elements) of `A` is `U`.
5379/// `B` is an $n \times {nrhs}$ right-hand side matrix.
5380///
5381/// The `operation` argument is described by [`Operation`].
5382///
5383/// `pivots` is an output of [`xgetrf`].
5384/// It contains the pivot indices used to permute the right-hand sides.
5385///
5386/// If the reported `info` value is `-i`, the `i`th parameter is invalid.
5387///
5388/// Callers can combine [`xgetrf`] and [`xgetrs`] to complete a linear solver.
5389///
5390/// Currently, [`xgetrs`] supports only the default algorithm.
5391///
5392/// **Algorithms supported by [`xgetrs`]**
5393///
5394/// | Algorithm | Notes |
5395/// | --- | --- |
5396/// | [`AlgorithmMode::Default`](crate::types::AlgorithmMode::Default) | Default algorithm. |
5397///
5398/// List of input arguments for [`xgetrs`]:
5399///
5400/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of matrix `A`, and `data_type_b` is the data type of matrix `B`.
5401/// [`xgetrs`] only supports the following four combinations:
5402///
5403/// **Valid combination of data type and compute type**
5404///
5405/// | **data_type_a** | **data_type_b** | **Meaning** |
5406/// | --- | --- | --- |
5407/// | [`DataType::F32`] | [`DataType::F32`] | `SGETRS` |
5408/// | [`DataType::F64`] | [`DataType::F64`] | `DGETRS` |
5409/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CGETRS` |
5410/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZGETRS` |
5411///
5412/// # Errors
5413///
5414/// Returns an error if cuSOLVER has not been initialized, if the
5415/// matrix dimensions or leading dimensions are invalid, or if cuSOLVER reports
5416/// an internal failure.
5417pub fn xgetrs<TA: DataTypeLike, TB: DataTypeLike>(
5418 ctx: &Context,
5419 params: &Params,
5420 operation: Operation,
5421 n: usize,
5422 nrhs: usize,
5423 a: MatrixRef<'_, TA>,
5424 pivots: &DeviceMemory<i64>,
5425 b: MatrixMut<'_, TB>,
5426 dev_info: &mut DeviceMemory<i32>,
5427) -> Result<()> {
5428 ctx.bind()?;
5429 let a_type = TA::data_type();
5430 let b_type = TB::data_type();
5431 validate_x_matrix(n, n, a.data.byte_len(), a.leading_dimension, a_type)?;
5432 require_pivot64_buffer(pivots, n)?;
5433 validate_x_matrix(n, nrhs, b.data.byte_len(), b.leading_dimension, b_type)?;
5434 require_info_buffer(dev_info)?;
5435 unsafe {
5436 try_ffi!(sys::cusolverDnXgetrs(
5437 ctx.as_raw(),
5438 params.as_raw(),
5439 operation.into(),
5440 to_i64(n, "n")?,
5441 to_i64(nrhs, "nrhs")?,
5442 a_type.into(),
5443 a.data.as_ptr().cast(),
5444 to_i64(a.leading_dimension, "lda")?,
5445 pivots.as_ptr().cast(),
5446 b_type.into(),
5447 b.data.as_mut_ptr().cast(),
5448 to_i64(b.leading_dimension, "ldb")?,
5449 dev_info.as_mut_ptr().cast(),
5450 ))?;
5451 }
5452 Ok(())
5453}
5454
5455pub fn xsytrs_buffer_size<TA: DataTypeLike, TB: DataTypeLike>(
5456 ctx: &Context,
5457 fill_mode: FillMode,
5458 n: usize,
5459 nrhs: usize,
5460 a: MatrixRef<'_, TA>,
5461 pivots: Option<&DeviceMemory<i64>>,
5462 b: MatrixRef<'_, TB>,
5463) -> Result<WorkspaceSizes> {
5464 ctx.bind()?;
5465 validate_x_matrix(
5466 n,
5467 n,
5468 a.data.byte_len(),
5469 a.leading_dimension,
5470 TA::data_type(),
5471 )?;
5472 validate_x_matrix(
5473 n,
5474 nrhs,
5475 b.data.byte_len(),
5476 b.leading_dimension,
5477 TB::data_type(),
5478 )?;
5479 if let Some(pivots) = pivots {
5480 require_pivot64_buffer(pivots, n)?;
5481 }
5482
5483 let mut device_bytes = 0;
5484 let mut host_bytes = 0;
5485 unsafe {
5486 try_ffi!(sys::cusolverDnXsytrs_bufferSize(
5487 ctx.as_raw(),
5488 fill_mode.into(),
5489 to_i64(n, "n")?,
5490 to_i64(nrhs, "nrhs")?,
5491 TA::data_type().into(),
5492 a.data.as_ptr().cast(),
5493 to_i64(a.leading_dimension, "lda")?,
5494 pivots.map_or(std::ptr::null(), DeviceMemory::as_ptr),
5495 TB::data_type().into(),
5496 b.data.as_ptr().cast_mut().cast(),
5497 to_i64(b.leading_dimension, "ldb")?,
5498 &raw mut device_bytes,
5499 &raw mut host_bytes,
5500 ))?;
5501 }
5502 Ok(WorkspaceSizes::new(
5503 device_bytes as usize,
5504 host_bytes as usize,
5505 ))
5506}
5507
5508/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5509///
5510/// Solves a system of linear equations through the generic cuSOLVER routine.
5511///
5512/// `A` contains the factorization produced by the typed `*sytrf` operations in this module.
5513/// Only the lower or upper part is meaningful; the other part is left untouched.
5514///
5515/// Provide the pivot indices returned by the matching `*sytrf` operation, along
5516/// with device and host workspace through `workspace`.
5517/// Use [`xsytrs_buffer_size`] to determine the required sizes for
5518/// `workspace.device` and `workspace.host`.
5519/// To factorize and solve the symmetric system without pivoting, pass `None`
5520/// for the pivot buffer to both the matching `*sytrf` operation and [`xsytrs`].
5521///
5522/// If the reported `dev_info` value is `-i`, the `i`th parameter is invalid.
5523///
5524/// List of input arguments for [`xsytrs_buffer_size`] and [`xsytrs`]:
5525///
5526/// The generic cuSOLVER routine has two data types: `data_type_a` is the data type of the
5527/// matrix `A`, and `data_type_b` is the data type of the matrix `B`.
5528/// [`xsytrs`] only supports the following four combinations:
5529///
5530/// **Valid combination of data type and compute type**
5531///
5532/// | **data_type_a** | **data_type_b** | **Meaning** |
5533/// | --- | --- | --- |
5534/// | [`DataType::F32`] | [`DataType::F32`] | `SSYTRS` |
5535/// | [`DataType::F64`] | [`DataType::F64`] | `DSYTRS` |
5536/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CSYTRS` |
5537/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZSYTRS` |
5538///
5539/// # Errors
5540///
5541/// Returns an error if cuSOLVER has not been initialized, if the
5542/// matrix dimensions or leading dimension are invalid, if the matrix data type
5543/// is not supported, or if cuSOLVER reports an internal failure.
5544pub fn xsytrs<TA: DataTypeLike, TB: DataTypeLike>(
5545 ctx: &Context,
5546 fill_mode: FillMode,
5547 n: usize,
5548 nrhs: usize,
5549 a: MatrixRef<'_, TA>,
5550 pivots: Option<&DeviceMemory<i64>>,
5551 b: MatrixMut<'_, TB>,
5552 workspace: ByteWorkspaceMut<'_>,
5553 dev_info: &mut DeviceMemory<i32>,
5554) -> Result<()> {
5555 ctx.bind()?;
5556 validate_x_matrix(
5557 n,
5558 n,
5559 a.data.byte_len(),
5560 a.leading_dimension,
5561 TA::data_type(),
5562 )?;
5563 validate_x_matrix(
5564 n,
5565 nrhs,
5566 b.data.byte_len(),
5567 b.leading_dimension,
5568 TB::data_type(),
5569 )?;
5570 if let Some(pivots) = pivots {
5571 require_pivot64_buffer(pivots, n)?;
5572 }
5573 require_info_buffer(dev_info)?;
5574 let workspace_sizes = xsytrs_buffer_size(ctx, fill_mode, n, nrhs, a, pivots, b.as_ref())?;
5575 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5576 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5577 unsafe {
5578 try_ffi!(sys::cusolverDnXsytrs(
5579 ctx.as_raw(),
5580 fill_mode.into(),
5581 to_i64(n, "n")?,
5582 to_i64(nrhs, "nrhs")?,
5583 TA::data_type().into(),
5584 a.data.as_ptr().cast(),
5585 to_i64(a.leading_dimension, "lda")?,
5586 pivots.map_or(std::ptr::null(), DeviceMemory::as_ptr),
5587 TB::data_type().into(),
5588 b.data.as_mut_ptr().cast(),
5589 to_i64(b.leading_dimension, "ldb")?,
5590 workspace.device.as_mut_ptr().cast(),
5591 workspace_sizes.device_bytes as _,
5592 workspace.host.as_mut_ptr().cast(),
5593 workspace_sizes.host_bytes as _,
5594 dev_info.as_mut_ptr().cast(),
5595 ))?;
5596 }
5597 Ok(())
5598}
5599
5600pub fn xlarft_buffer_size<TV: DataTypeLike, TTau: DataTypeLike, TT: DataTypeLike>(
5601 ctx: &Context,
5602 params: &Params,
5603 direct: DirectMode,
5604 storev: StorevMode,
5605 n: usize,
5606 k: usize,
5607 v: MatrixRef<'_, TV>,
5608 tau: VectorRef<'_, TTau>,
5609 t: MatrixRef<'_, TT>,
5610 compute_type: DataType,
5611) -> Result<WorkspaceSizes> {
5612 ctx.bind()?;
5613 let v_type = TV::data_type();
5614 let tau_type = TTau::data_type();
5615 let t_type = TT::data_type();
5616 validate_xlarft_inputs(
5617 n,
5618 k,
5619 storev,
5620 v.data.byte_len(),
5621 v.leading_dimension,
5622 v_type,
5623 tau.data.byte_len(),
5624 tau_type,
5625 t.data.byte_len(),
5626 t.leading_dimension,
5627 t_type,
5628 )?;
5629 let mut device_bytes = 0;
5630 let mut host_bytes = 0;
5631 unsafe {
5632 try_ffi!(sys::cusolverDnXlarft_bufferSize(
5633 ctx.as_raw(),
5634 params.as_raw(),
5635 direct.into(),
5636 storev.into(),
5637 to_i64(n, "n")?,
5638 to_i64(k, "k")?,
5639 v_type.into(),
5640 v.data.as_ptr().cast(),
5641 to_i64(v.leading_dimension, "ldv")?,
5642 tau_type.into(),
5643 tau.data.as_ptr().cast(),
5644 t_type.into(),
5645 t.data.as_ptr().cast_mut().cast(),
5646 to_i64(t.leading_dimension, "ldt")?,
5647 compute_type.into(),
5648 &raw mut device_bytes,
5649 &raw mut host_bytes,
5650 ))?;
5651 }
5652 Ok(WorkspaceSizes::new(
5653 device_bytes as usize,
5654 host_bytes as usize,
5655 ))
5656}
5657
5658/// Use the matching buffer-size helper to calculate the sizes needed for pre-allocated workspace.
5659///
5660/// Forms the triangular factor `T` of a real block reflector `H` of order `n`,
5661/// which is defined as a product of `k` elementary reflectors.
5662///
5663/// Only [`StorevMode::Columnwise`] storage is supported. This means the vector
5664/// defining the elementary reflector `H(i)` is stored in the `i`th column of
5665/// `V`, and $H = I - V \cdot T \cdot V^{T}$ ($H = I - V \cdot T \cdot V^{H}$
5666/// for complex types).
5667///
5668/// Provide device and host workspace through `workspace`.
5669/// Use [`xlarft_buffer_size`] to determine the required sizes for
5670/// `workspace.device` and `workspace.host`.
5671///
5672/// Currently, only the `n >= k` scenario is supported.
5673///
5674/// The generic cuSOLVER routine has four data types:
5675///
5676/// [`xlarft`] only supports the following four combinations.
5677///
5678/// **Valid combinations of data types and compute types**
5679///
5680/// | **data_type_v** | **data_type_tau** | **data_type_t** | **compute_type** | **Meaning** |
5681/// | --- | --- | --- | --- | --- |
5682/// | [`DataType::F32`] | [`DataType::F32`] | [`DataType::F32`] | [`DataType::F32`] | `SLARFT` |
5683/// | [`DataType::F64`] | [`DataType::F64`] | [`DataType::F64`] | [`DataType::F64`] | `DLARFT` |
5684/// | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | [`DataType::ComplexF32`] | `CLARFT` |
5685/// | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | [`DataType::ComplexF64`] | `ZLARFT` |
5686///
5687/// # Errors
5688///
5689/// Returns an error if cuSOLVER has not been initialized, if the
5690/// reflector dimensions or storage mode are invalid, or if cuSOLVER reports an
5691/// internal failure.
5692pub fn xlarft<TV: DataTypeLike, TTau: DataTypeLike, TT: DataTypeLike>(
5693 ctx: &Context,
5694 params: &Params,
5695 direct: DirectMode,
5696 storev: StorevMode,
5697 n: usize,
5698 k: usize,
5699 v: MatrixRef<'_, TV>,
5700 tau: VectorRef<'_, TTau>,
5701 t: MatrixMut<'_, TT>,
5702 compute_type: DataType,
5703 workspace: ByteWorkspaceMut<'_>,
5704) -> Result<()> {
5705 ctx.bind()?;
5706 let v_type = TV::data_type();
5707 let tau_type = TTau::data_type();
5708 let t_type = TT::data_type();
5709 validate_xlarft_inputs(
5710 n,
5711 k,
5712 storev,
5713 v.data.byte_len(),
5714 v.leading_dimension,
5715 v_type,
5716 tau.data.byte_len(),
5717 tau_type,
5718 t.data.byte_len(),
5719 t.leading_dimension,
5720 t_type,
5721 )?;
5722 let workspace_sizes = xlarft_buffer_size(
5723 ctx,
5724 params,
5725 direct,
5726 storev,
5727 n,
5728 k,
5729 v,
5730 tau,
5731 t.as_ref(),
5732 compute_type,
5733 )?;
5734 require_workspace_bytes(workspace.device.byte_len(), workspace_sizes.device_bytes)?;
5735 require_host_workspace(workspace.host.len(), workspace_sizes.host_bytes)?;
5736 unsafe {
5737 try_ffi!(sys::cusolverDnXlarft(
5738 ctx.as_raw(),
5739 params.as_raw(),
5740 direct.into(),
5741 storev.into(),
5742 to_i64(n, "n")?,
5743 to_i64(k, "k")?,
5744 v_type.into(),
5745 v.data.as_ptr().cast(),
5746 to_i64(v.leading_dimension, "ldv")?,
5747 tau_type.into(),
5748 tau.data.as_ptr().cast(),
5749 t_type.into(),
5750 t.data.as_mut_ptr().cast(),
5751 to_i64(t.leading_dimension, "ldt")?,
5752 compute_type.into(),
5753 workspace.device.as_mut_ptr().cast(),
5754 workspace_sizes.device_bytes as _,
5755 workspace.host.as_mut_ptr().cast(),
5756 workspace_sizes.host_bytes as _,
5757 ))?;
5758 }
5759 Ok(())
5760}
5761
5762fn validate_square_matrix(n: usize, len: usize, lda: usize) -> Result<()> {
5763 validate_matrix(n, n, len, lda)
5764}
5765
5766fn validate_matrix(rows: usize, cols: usize, len: usize, lda: usize) -> Result<()> {
5767 if rows == 0 || cols == 0 {
5768 return Err(Error::InvalidMatrixShape);
5769 }
5770 if lda < rows {
5771 return Err(Error::InvalidLeadingDimension);
5772 }
5773 let required = lda.checked_mul(cols).ok_or(Error::InvalidMatrixShape)?;
5774 if len < required {
5775 return Err(Error::InvalidMatrixShape);
5776 }
5777 Ok(())
5778}
5779
5780fn require_workspace(actual: usize, required: usize) -> Result<()> {
5781 if actual < required {
5782 return Err(Error::InsufficientWorkspaceSize { required, actual });
5783 }
5784 Ok(())
5785}
5786
5787fn require_workspace_bytes(actual: usize, required: usize) -> Result<()> {
5788 if actual < required {
5789 return Err(Error::InsufficientWorkspaceSize { required, actual });
5790 }
5791 Ok(())
5792}
5793
5794fn require_host_workspace(actual: usize, required: usize) -> Result<()> {
5795 if actual < required {
5796 return Err(Error::InsufficientWorkspaceSize { required, actual });
5797 }
5798 Ok(())
5799}
5800
5801fn require_info_buffer(dev_info: &DeviceMemory<i32>) -> Result<()> {
5802 if dev_info.is_empty() {
5803 return Err(Error::InvalidVectorShape);
5804 }
5805 Ok(())
5806}
5807
5808fn require_info_entries(dev_info: &DeviceMemory<i32>, required: usize) -> Result<()> {
5809 if dev_info.len() < required {
5810 return Err(Error::InvalidVectorShape);
5811 }
5812 Ok(())
5813}
5814
5815fn require_pivot_buffer(pivots: &DeviceMemory<i32>, required: usize) -> Result<()> {
5816 if pivots.len() < required {
5817 return Err(Error::InvalidVectorShape);
5818 }
5819 Ok(())
5820}
5821
5822fn require_pivot64_buffer(pivots: &DeviceMemory<i64>, required: usize) -> Result<()> {
5823 if pivots.len() < required {
5824 return Err(Error::InvalidVectorShape);
5825 }
5826 Ok(())
5827}
5828
5829fn require_tau_buffer<T>(tau: &DeviceMemory<T>, required: usize) -> Result<()> {
5830 if tau.len() < required {
5831 return Err(Error::InvalidVectorShape);
5832 }
5833 Ok(())
5834}
5835
5836fn qr_rows(side: SideMode, m: usize, n: usize) -> usize {
5837 match side {
5838 SideMode::Left => m,
5839 SideMode::Right => n,
5840 }
5841}
5842
5843fn tridiagonal_order(side: SideMode, m: usize, n: usize) -> usize {
5844 match side {
5845 SideMode::Left => m,
5846 SideMode::Right => n,
5847 }
5848}
5849
5850fn validate_bidiagonal_dims(m: usize, n: usize) -> Result<()> {
5851 if m == 0 || n == 0 || m < n {
5852 return Err(Error::InvalidMatrixShape);
5853 }
5854 Ok(())
5855}
5856
5857fn validate_bidiagonal_buffers(
5858 m: usize,
5859 n: usize,
5860 a_len: usize,
5861 lda: usize,
5862 d_len: usize,
5863 e_len: usize,
5864 tauq_len: usize,
5865 taup_len: usize,
5866) -> Result<()> {
5867 validate_bidiagonal_dims(m, n)?;
5868 validate_matrix(m, n, a_len, lda)?;
5869 if d_len < n || e_len < n || tauq_len < n || taup_len < n {
5870 return Err(Error::InvalidVectorShape);
5871 }
5872 Ok(())
5873}
5874
5875fn validate_orgbr_inputs(
5876 side: SideMode,
5877 m: usize,
5878 n: usize,
5879 k: usize,
5880 a_len: usize,
5881 lda: usize,
5882 tau_len: usize,
5883) -> Result<()> {
5884 if m == 0 || n == 0 || k == 0 {
5885 return Err(Error::InvalidMatrixShape);
5886 }
5887 validate_matrix(m, n, a_len, lda)?;
5888 if tau_len < k {
5889 return Err(Error::InvalidVectorShape);
5890 }
5891 match side {
5892 SideMode::Left if m < n || k > m => Err(Error::InvalidMatrixShape),
5893 SideMode::Right if n < m || k > n => Err(Error::InvalidMatrixShape),
5894 _ => Ok(()),
5895 }
5896}
5897
5898fn validate_sytrd_inputs(
5899 n: usize,
5900 a_len: usize,
5901 lda: usize,
5902 d_len: usize,
5903 e_len: usize,
5904 tau_len: usize,
5905) -> Result<()> {
5906 validate_square_matrix(n, a_len, lda)?;
5907 let reflectors = n.saturating_sub(1);
5908 if d_len < n || e_len < reflectors || tau_len < reflectors {
5909 return Err(Error::InvalidVectorShape);
5910 }
5911 Ok(())
5912}
5913
5914fn validate_orgtr_inputs(n: usize, a_len: usize, lda: usize, tau_len: usize) -> Result<()> {
5915 validate_square_matrix(n, a_len, lda)?;
5916 if tau_len < n.saturating_sub(1) {
5917 return Err(Error::InvalidVectorShape);
5918 }
5919 Ok(())
5920}
5921
5922fn validate_ormtr_inputs(
5923 side: SideMode,
5924 m: usize,
5925 n: usize,
5926 a_len: usize,
5927 lda: usize,
5928 tau_len: usize,
5929 c_len: usize,
5930 ldc: usize,
5931) -> Result<()> {
5932 let nq = tridiagonal_order(side, m, n);
5933 validate_square_matrix(nq, a_len, lda)?;
5934 validate_matrix(m, n, c_len, ldc)?;
5935 if tau_len < nq.saturating_sub(1) {
5936 return Err(Error::InvalidVectorShape);
5937 }
5938 Ok(())
5939}
5940
5941fn validate_batched_square_matrix_pointers<T>(
5942 n: usize,
5943 matrices: BatchedMatrixRef<'_, T>,
5944) -> Result<()> {
5945 if n == 0 || matrices.is_empty() {
5946 return Err(Error::InvalidMatrixShape);
5947 }
5948 if matrices.leading_dimension < n {
5949 return Err(Error::InvalidLeadingDimension);
5950 }
5951 Ok(())
5952}
5953
5954fn validate_batched_vector_pointers<T>(n: usize, vectors: BatchedVectorRef<'_, T>) -> Result<()> {
5955 if n == 0 || vectors.is_empty() {
5956 return Err(Error::InvalidVectorShape);
5957 }
5958 if vectors.leading_dimension < n {
5959 return Err(Error::InvalidLeadingDimension);
5960 }
5961 Ok(())
5962}
5963
5964fn validate_x_matrix(
5965 rows: usize,
5966 cols: usize,
5967 bytes: usize,
5968 lda: usize,
5969 data_type: DataType,
5970) -> Result<()> {
5971 if rows == 0 || cols == 0 {
5972 return Err(Error::InvalidMatrixShape);
5973 }
5974 if lda < rows {
5975 return Err(Error::InvalidLeadingDimension);
5976 }
5977 let required = lda
5978 .checked_mul(cols)
5979 .and_then(|count| count.checked_mul(data_type.size_in_bytes()))
5980 .ok_or(Error::InvalidMatrixShape)?;
5981 if bytes < required {
5982 return Err(Error::InvalidMatrixShape);
5983 }
5984 Ok(())
5985}
5986
5987fn validate_x_vector(len: usize, bytes: usize, data_type: DataType) -> Result<()> {
5988 let required = len
5989 .checked_mul(data_type.size_in_bytes())
5990 .ok_or(Error::InvalidVectorShape)?;
5991 if bytes < required {
5992 return Err(Error::InvalidVectorShape);
5993 }
5994 Ok(())
5995}
5996
5997fn validate_xlarft_inputs(
5998 n: usize,
5999 k: usize,
6000 storev: StorevMode,
6001 v_bytes: usize,
6002 ldv: usize,
6003 v_type: DataType,
6004 tau_bytes: usize,
6005 tau_type: DataType,
6006 t_bytes: usize,
6007 ldt: usize,
6008 t_type: DataType,
6009) -> Result<()> {
6010 if n == 0 || k == 0 || k > n {
6011 return Err(Error::InvalidMatrixShape);
6012 }
6013 if storev != StorevMode::Columnwise {
6014 return Err(Error::InvalidMatrixShape);
6015 }
6016 validate_x_matrix(n, k, v_bytes, ldv, v_type)?;
6017 validate_x_vector(k, tau_bytes, tau_type)?;
6018 validate_x_matrix(k, k, t_bytes, ldt, t_type)?;
6019 Ok(())
6020}