1#![allow(dead_code)]
19
20use std::sync::Arc;
21
22use oxicuda_blas::GpuFloat;
23use oxicuda_driver::Module;
24use oxicuda_launch::{Kernel, LaunchParams};
25use oxicuda_memory::DeviceBuffer;
26use oxicuda_ptx::ir::PtxType;
27use oxicuda_ptx::prelude::*;
28
29use crate::error::{SolverError, SolverResult};
30use crate::handle::SolverHandle;
31use crate::ptx_helpers::SOLVER_BLOCK_SIZE;
32
33fn from_f64_to_t<T: GpuFloat>(val: f64) -> T {
35 if T::SIZE == 4 {
36 T::from_bits_u64(u64::from((val as f32).to_bits()))
37 } else {
38 T::from_bits_u64(val.to_bits())
39 }
40}
41
42fn t_to_f64<T: GpuFloat>(val: T) -> f64 {
49 if T::SIZE == 8 {
50 f64::from_bits(val.to_bits_u64())
51 } else {
52 f64::from(f32::from_bits(val.to_bits_u64() as u32))
53 }
54}
55
56const JACOBI_SVD_THRESHOLD: u32 = 32;
58
59const JACOBI_MAX_SWEEPS: u32 = 100;
61
62const JACOBI_TOL: f64 = 1e-14;
64
65const BIDIAG_QR_MAX_ITER: u32 = 200;
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum SvdJob {
75 All,
77 Thin,
79 SingularValuesOnly,
81}
82
83#[derive(Debug, Clone)]
87pub struct SvdResult<T: GpuFloat> {
88 pub singular_values: Vec<T>,
90 pub u: Option<Vec<T>>,
93 pub vt: Option<Vec<T>>,
96 pub info: i32,
98}
99
100pub fn svd<T: GpuFloat>(
128 handle: &mut SolverHandle,
129 a: &mut DeviceBuffer<T>,
130 m: u32,
131 n: u32,
132 lda: u32,
133 job: SvdJob,
134) -> SolverResult<SvdResult<T>> {
135 if m == 0 || n == 0 {
137 return Ok(SvdResult {
138 singular_values: Vec::new(),
139 u: if job == SvdJob::SingularValuesOnly {
140 None
141 } else {
142 Some(Vec::new())
143 },
144 vt: if job == SvdJob::SingularValuesOnly {
145 None
146 } else {
147 Some(Vec::new())
148 },
149 info: 0,
150 });
151 }
152 if lda < m {
153 return Err(SolverError::DimensionMismatch(format!(
154 "svd: lda ({lda}) must be >= m ({m})"
155 )));
156 }
157 let required = n as usize * lda as usize;
158 if a.len() < required {
159 return Err(SolverError::DimensionMismatch(format!(
160 "svd: buffer too small ({} < {required})",
161 a.len()
162 )));
163 }
164
165 if m <= JACOBI_SVD_THRESHOLD && n <= JACOBI_SVD_THRESHOLD {
167 jacobi_svd(handle, a, m, n, lda, job)
168 } else {
169 bidiag_svd(handle, a, m, n, lda, job)
170 }
171}
172
173fn jacobi_svd<T: GpuFloat>(
186 handle: &mut SolverHandle,
187 a: &mut DeviceBuffer<T>,
188 m: u32,
189 n: u32,
190 lda: u32,
191 job: SvdJob,
192) -> SolverResult<SvdResult<T>> {
193 let k = m.min(n);
194
195 let v_size = n as usize * n as usize * T::SIZE;
197 let ws_needed = v_size + T::SIZE; handle.ensure_workspace(ws_needed)?;
199
200 let sm = handle.sm_version();
202 let ptx = emit_jacobi_svd::<T>(sm, m, n)?;
203 let module = Arc::new(Module::from_ptx(&ptx)?);
204 let kernel = Kernel::from_module(module, &jacobi_svd_name::<T>(m, n))?;
205
206 let shared_bytes = (m * n + n * n) * T::size_u32();
208 let params = LaunchParams::new(1u32, SOLVER_BLOCK_SIZE).with_shared_mem(shared_bytes);
209
210 let args = (a.as_device_ptr(), lda, m, n, JACOBI_MAX_SWEEPS);
211 kernel.launch(¶ms, handle.stream(), &args)?;
212
213 let singular_values = extract_singular_values::<T>(a, m, n, lda, k)?;
217 let (u_out, vt_out) = match job {
218 SvdJob::SingularValuesOnly => (None, None),
219 SvdJob::Thin => {
220 let u_vec = extract_u_thin::<T>(a, m, n, lda, k)?;
221 let vt_vec = vec![T::gpu_zero(); k as usize * n as usize];
222 (Some(u_vec), Some(vt_vec))
223 }
224 SvdJob::All => {
225 let u_vec = extract_u_full::<T>(a, m, lda, k)?;
226 let vt_vec = vec![T::gpu_zero(); n as usize * n as usize];
227 (Some(u_vec), Some(vt_vec))
228 }
229 };
230
231 Ok(SvdResult {
232 singular_values,
233 u: u_out,
234 vt: vt_out,
235 info: 0,
236 })
237}
238
239fn extract_singular_values<T: GpuFloat>(
244 a: &DeviceBuffer<T>,
245 m: u32,
246 n: u32,
247 lda: u32,
248 k: u32,
249) -> SolverResult<Vec<T>> {
250 let total = lda as usize * n as usize;
251 let mut host = vec![T::gpu_zero(); total];
252 a.copy_to_host(&mut host).map_err(|e| {
253 SolverError::InternalError(format!("extract_singular_values copy_to_host failed: {e}"))
254 })?;
255
256 let mut result = Vec::with_capacity(k as usize);
257 for j in 0..k as usize {
258 let col_start = j * lda as usize;
260 let sum_sq: f64 = (0..m as usize)
261 .map(|i| {
262 let v = t_to_f64(host[col_start + i]);
263 v * v
264 })
265 .sum();
266 result.push(from_f64_to_t(sum_sq.sqrt()));
267 }
268 Ok(result)
269}
270
271fn extract_u_thin<T: GpuFloat>(
276 a: &DeviceBuffer<T>,
277 m: u32,
278 n: u32,
279 lda: u32,
280 k: u32,
281) -> SolverResult<Vec<T>> {
282 let total = lda as usize * n as usize;
283 let mut host = vec![T::gpu_zero(); total];
284 a.copy_to_host(&mut host).map_err(|e| {
285 SolverError::InternalError(format!("extract_u_thin copy_to_host failed: {e}"))
286 })?;
287
288 let m_usize = m as usize;
289 let k_usize = k as usize;
290 let lda_usize = lda as usize;
291
292 let mut u_vec = vec![T::gpu_zero(); m_usize * k_usize];
294 for j in 0..k_usize {
295 let col_start = j * lda_usize;
296 let sum_sq: f64 = (0..m_usize)
298 .map(|i| {
299 let v = t_to_f64(host[col_start + i]);
300 v * v
301 })
302 .sum();
303 let norm = sum_sq.sqrt();
304 let inv_norm = if norm > 1e-300 { 1.0 / norm } else { 0.0 };
305
306 for i in 0..m_usize {
307 let val = t_to_f64(host[col_start + i]) * inv_norm;
308 u_vec[j * m_usize + i] = from_f64_to_t(val);
309 }
310 }
311 Ok(u_vec)
312}
313
314fn extract_u_full<T: GpuFloat>(
320 a: &DeviceBuffer<T>,
321 m: u32,
322 lda: u32,
323 k: u32,
324) -> SolverResult<Vec<T>> {
325 let n = k; let total = lda as usize * n as usize;
327 let mut host = vec![T::gpu_zero(); total];
328 a.copy_to_host(&mut host).map_err(|e| {
329 SolverError::InternalError(format!("extract_u_full copy_to_host failed: {e}"))
330 })?;
331
332 let m_usize = m as usize;
333 let k_usize = k as usize;
334 let lda_usize = lda as usize;
335
336 let mut u_vec = vec![T::gpu_zero(); m_usize * m_usize];
338
339 for j in 0..k_usize {
341 let col_start = j * lda_usize;
342 let sum_sq: f64 = (0..m_usize)
343 .map(|i| {
344 let v = t_to_f64(host[col_start + i]);
345 v * v
346 })
347 .sum();
348 let norm = sum_sq.sqrt();
349 let inv_norm = if norm > 1e-300 { 1.0 / norm } else { 0.0 };
350
351 for i in 0..m_usize {
352 let val = t_to_f64(host[col_start + i]) * inv_norm;
353 u_vec[j * m_usize + i] = from_f64_to_t(val);
354 }
355 }
356
357 for j in k_usize..m_usize {
361 u_vec[j * m_usize + j] = T::gpu_one();
362 }
363
364 Ok(u_vec)
365}
366
367fn bidiag_svd<T: GpuFloat>(
379 handle: &mut SolverHandle,
380 a: &mut DeviceBuffer<T>,
381 m: u32,
382 n: u32,
383 lda: u32,
384 job: SvdJob,
385) -> SolverResult<SvdResult<T>> {
386 let k = m.min(n);
387
388 let tauq_size = k as usize * T::SIZE;
390 let taup_size = k as usize * T::SIZE;
391 let diag_size = k as usize * std::mem::size_of::<f64>();
392 let super_diag_size = k.saturating_sub(1) as usize * std::mem::size_of::<f64>();
393 let ws_needed = tauq_size + taup_size + diag_size + super_diag_size;
394 handle.ensure_workspace(ws_needed)?;
395
396 let mut tauq = DeviceBuffer::<T>::zeroed(k as usize)?;
398 let mut taup = DeviceBuffer::<T>::zeroed(k as usize)?;
399 bidiagonalize(handle, a, m, n, lda, &mut tauq, &mut taup)?;
400
401 let mut d = vec![0.0_f64; k as usize];
403 let mut e = vec![0.0_f64; k.saturating_sub(1) as usize];
404 extract_bidiagonal::<T>(a, m, n, lda, &mut d, &mut e)?;
405
406 let mut u_bidiag = if job != SvdJob::SingularValuesOnly {
408 Some(vec![0.0_f64; k as usize * k as usize])
409 } else {
410 None
411 };
412 let mut vt_bidiag = if job != SvdJob::SingularValuesOnly {
413 Some(vec![0.0_f64; k as usize * k as usize])
414 } else {
415 None
416 };
417
418 let converged = bidiagonal_svd_qr(
419 &mut d,
420 &mut e,
421 u_bidiag.as_deref_mut(),
422 vt_bidiag.as_deref_mut(),
423 k,
424 )?;
425
426 if !converged {
427 return Err(SolverError::ConvergenceFailure {
428 iterations: BIDIAG_QR_MAX_ITER,
429 residual: e.iter().map(|v| v * v).sum::<f64>().sqrt(),
430 });
431 }
432
433 let singular_values: Vec<T> = d.iter().map(|&val| from_f64_to_t(val.abs())).collect();
435
436 let (u_out, vt_out) = match job {
438 SvdJob::SingularValuesOnly => (None, None),
439 SvdJob::Thin => {
440 let u_vec =
441 reconstruct_u_thin::<T>(handle, a, m, n, lda, &tauq, u_bidiag.as_deref(), k)?;
442 let vt_vec =
443 reconstruct_vt_thin::<T>(handle, a, m, n, lda, &taup, vt_bidiag.as_deref(), k)?;
444 (Some(u_vec), Some(vt_vec))
445 }
446 SvdJob::All => {
447 let u_vec =
448 reconstruct_u_full::<T>(handle, a, m, n, lda, &tauq, u_bidiag.as_deref(), k)?;
449 let vt_vec =
450 reconstruct_vt_full::<T>(handle, a, m, n, lda, &taup, vt_bidiag.as_deref(), k)?;
451 (Some(u_vec), Some(vt_vec))
452 }
453 };
454
455 Ok(SvdResult {
456 singular_values,
457 u: u_out,
458 vt: vt_out,
459 info: 0,
460 })
461}
462
463fn bidiagonalize<T: GpuFloat>(
471 handle: &SolverHandle,
472 a: &mut DeviceBuffer<T>,
473 m: u32,
474 n: u32,
475 lda: u32,
476 tauq: &mut DeviceBuffer<T>,
477 taup: &mut DeviceBuffer<T>,
478) -> SolverResult<()> {
479 let k = m.min(n);
480
481 let sm = handle.sm_version();
491 let ptx = emit_bidiag_step::<T>(sm)?;
492 let module = Arc::new(Module::from_ptx(&ptx)?);
493 let kernel = Kernel::from_module(module, &bidiag_step_name::<T>())?;
494
495 for i in 0..k {
496 let rows_below = m - i;
497 let cols_right = n.saturating_sub(i + 1);
498
499 let shared_bytes = (rows_below + cols_right) * T::size_u32();
500 let params = LaunchParams::new(1u32, SOLVER_BLOCK_SIZE).with_shared_mem(shared_bytes);
501
502 let a_offset = (i as u64 + i as u64 * lda as u64) * T::SIZE as u64;
503 let tauq_offset = i as u64 * T::SIZE as u64;
504 let taup_offset = i as u64 * T::SIZE as u64;
505
506 let args = (
507 a.as_device_ptr() + a_offset,
508 tauq.as_device_ptr() + tauq_offset,
509 taup.as_device_ptr() + taup_offset,
510 rows_below,
511 cols_right,
512 lda,
513 );
514 kernel.launch(¶ms, handle.stream(), &args)?;
515 }
516
517 Ok(())
518}
519
520fn extract_bidiagonal<T: GpuFloat>(
525 a: &DeviceBuffer<T>,
526 m: u32,
527 n: u32,
528 lda: u32,
529 d: &mut [f64],
530 e: &mut [f64],
531) -> SolverResult<()> {
532 let k = m.min(n) as usize;
533 let total = lda as usize * n as usize;
534 let mut host = vec![T::gpu_zero(); total];
535 a.copy_to_host(&mut host).map_err(|e_err| {
536 SolverError::InternalError(format!("extract_bidiagonal copy_to_host failed: {e_err}"))
537 })?;
538
539 let lda_usize = lda as usize;
540
541 for i in 0..k {
543 d[i] = t_to_f64(host[i * lda_usize + i]);
544 }
545
546 for i in 0..k.saturating_sub(1) {
548 e[i] = t_to_f64(host[(i + 1) * lda_usize + i]);
549 }
550
551 Ok(())
552}
553
554fn bidiagonal_svd_qr(
562 d: &mut [f64],
563 e: &mut [f64],
564 u: Option<&mut [f64]>,
565 vt: Option<&mut [f64]>,
566 k: u32,
567) -> SolverResult<bool> {
568 let n = k as usize;
569 if n == 0 {
570 return Ok(true);
571 }
572
573 if let Some(u_mat) = u {
575 for val in u_mat.iter_mut() {
576 *val = 0.0;
577 }
578 for i in 0..n {
579 u_mat[i * n + i] = 1.0;
580 }
581 }
582 if let Some(vt_mat) = vt {
583 for val in vt_mat.iter_mut() {
584 *val = 0.0;
585 }
586 for i in 0..n {
587 vt_mat[i * n + i] = 1.0;
588 }
589 }
590
591 let tol = JACOBI_TOL;
594
595 for _iter in 0..BIDIAG_QR_MAX_ITER {
596 let mut q = n.saturating_sub(1);
598 while q > 0 && e[q - 1].abs() <= tol * (d[q - 1].abs() + d[q].abs()) {
599 e[q - 1] = 0.0;
600 q -= 1;
601 }
602 if q == 0 {
603 return Ok(true);
605 }
606
607 let mut p = q - 1;
609 while p > 0 && e[p - 1].abs() > tol * (d[p - 1].abs() + d[p].abs()) {
610 p -= 1;
611 }
612
613 bidiagonal_qr_step(d, e, p, q);
615 }
616
617 let off_norm: f64 = e.iter().map(|v| v * v).sum::<f64>().sqrt();
619 Ok(off_norm <= tol)
620}
621
622fn bidiagonal_qr_step(d: &mut [f64], e: &mut [f64], start: usize, end: usize) {
627 let dm1 = d[end - 1];
629 let dm = d[end];
630 let em1 = e[end - 1];
631
632 let t11 = dm1 * dm1
633 + if end >= 2 {
634 e[end - 2] * e[end - 2]
635 } else {
636 0.0
637 };
638 let t12 = dm1 * em1;
639 let t22 = dm * dm + em1 * em1;
640
641 let delta = (t11 - t22) * 0.5;
643 let sign_delta = if delta >= 0.0 { 1.0 } else { -1.0 };
644 let mu = t22 - t12 * t12 / (delta + sign_delta * (delta * delta + t12 * t12).sqrt());
645
646 let mut y = d[start] * d[start] - mu;
648 let mut z = d[start] * e[start];
649
650 for k in start..end {
651 let (cs, sn) = givens_rotation(y, z);
653 if k > start {
654 e[k - 1] = cs * e[k - 1] + sn * z;
655 }
656 let tmp_d = cs * d[k] + sn * e[k];
657 e[k] = -sn * d[k] + cs * e[k];
658 d[k] = tmp_d;
659 let tmp_z = sn * d[k + 1];
660 d[k + 1] *= cs;
661
662 y = d[k];
663 z = tmp_z;
664
665 let (cs2, sn2) = givens_rotation(y, z);
667 d[k] = cs2 * d[k] + sn2 * tmp_z;
668 let tmp_e = cs2 * e[k] + sn2 * d[k + 1];
669 d[k + 1] = -sn2 * e[k] + cs2 * d[k + 1];
670 e[k] = tmp_e;
671
672 if k + 1 < end {
673 y = e[k];
674 z = sn2 * e[k + 1];
675 e[k + 1] *= cs2;
676 }
677 }
678}
679
680fn givens_rotation(a: f64, b: f64) -> (f64, f64) {
684 if b.abs() < 1e-300 {
685 return (1.0, 0.0);
686 }
687 if a.abs() < 1e-300 {
688 return (0.0, if b >= 0.0 { 1.0 } else { -1.0 });
689 }
690 let r = (a * a + b * b).sqrt();
691 (a / r, b / r)
692}
693
694#[allow(clippy::too_many_arguments)]
700fn reconstruct_u_thin<T: GpuFloat>(
701 _handle: &SolverHandle,
702 _a: &DeviceBuffer<T>,
703 m: u32,
704 _n: u32,
705 _lda: u32,
706 _tauq: &DeviceBuffer<T>,
707 u_bidiag: Option<&[f64]>,
708 k: u32,
709) -> SolverResult<Vec<T>> {
710 let m_usize = m as usize;
711 let k_usize = k as usize;
712 Ok(build_u_embedding::<T>(u_bidiag, m_usize, k_usize, false))
713}
714
715#[allow(clippy::too_many_arguments)]
717fn reconstruct_u_full<T: GpuFloat>(
718 _handle: &SolverHandle,
719 _a: &DeviceBuffer<T>,
720 m: u32,
721 _n: u32,
722 _lda: u32,
723 _tauq: &DeviceBuffer<T>,
724 u_bidiag: Option<&[f64]>,
725 k: u32,
726) -> SolverResult<Vec<T>> {
727 let m_usize = m as usize;
728 let k_usize = k as usize;
729 Ok(build_u_embedding::<T>(u_bidiag, m_usize, k_usize, true))
730}
731
732#[allow(clippy::too_many_arguments)]
734fn reconstruct_vt_thin<T: GpuFloat>(
735 _handle: &SolverHandle,
736 _a: &DeviceBuffer<T>,
737 _m: u32,
738 n: u32,
739 _lda: u32,
740 _taup: &DeviceBuffer<T>,
741 vt_bidiag: Option<&[f64]>,
742 k: u32,
743) -> SolverResult<Vec<T>> {
744 let n_usize = n as usize;
745 let k_usize = k as usize;
746 Ok(build_vt_embedding::<T>(vt_bidiag, n_usize, k_usize, false))
747}
748
749#[allow(clippy::too_many_arguments)]
751fn reconstruct_vt_full<T: GpuFloat>(
752 _handle: &SolverHandle,
753 _a: &DeviceBuffer<T>,
754 _m: u32,
755 n: u32,
756 _lda: u32,
757 _taup: &DeviceBuffer<T>,
758 vt_bidiag: Option<&[f64]>,
759 k: u32,
760) -> SolverResult<Vec<T>> {
761 let n_usize = n as usize;
762 let k_usize = k as usize;
763 Ok(build_vt_embedding::<T>(vt_bidiag, n_usize, k_usize, true))
764}
765
766fn build_u_embedding<T: GpuFloat>(
767 u_bidiag: Option<&[f64]>,
768 m: usize,
769 k: usize,
770 full: bool,
771) -> Vec<T> {
772 let cols = if full { m } else { k };
773 let mut out = vec![T::gpu_zero(); m * cols];
774
775 if let Some(u_small) = u_bidiag {
776 for col in 0..k {
777 for row in 0..k.min(m) {
778 out[col * m + row] = from_f64_to_t(u_small[col * k + row]);
779 }
780 }
781 } else {
782 for i in 0..k.min(m) {
783 out[i * m + i] = T::gpu_one();
784 }
785 }
786
787 if full {
788 for i in k..m {
789 out[i * m + i] = T::gpu_one();
790 }
791 }
792
793 out
794}
795
796fn build_vt_embedding<T: GpuFloat>(
797 vt_bidiag: Option<&[f64]>,
798 n: usize,
799 k: usize,
800 full: bool,
801) -> Vec<T> {
802 let rows = if full { n } else { k };
803 let mut out = vec![T::gpu_zero(); rows * n];
804
805 if let Some(vt_small) = vt_bidiag {
806 for col in 0..k.min(n) {
807 for row in 0..k.min(rows) {
808 out[col * rows + row] = from_f64_to_t(vt_small[col * k + row]);
809 }
810 }
811 } else {
812 for i in 0..k.min(rows).min(n) {
813 out[i * rows + i] = T::gpu_one();
814 }
815 }
816
817 if full {
818 for i in k..n {
819 out[i * n + i] = T::gpu_one();
820 }
821 }
822
823 out
824}
825
826fn jacobi_svd_name<T: GpuFloat>(m: u32, n: u32) -> String {
831 format!("solver_jacobi_svd_{}_{}x{}", T::NAME, m, n)
832}
833
834fn bidiag_step_name<T: GpuFloat>() -> String {
835 format!("solver_bidiag_step_{}", T::NAME)
836}
837
838fn emit_jacobi_svd<T: GpuFloat>(sm: SmVersion, m: u32, n: u32) -> SolverResult<String> {
847 let name = jacobi_svd_name::<T>(m, n);
848 let float_ty = T::PTX_TYPE;
849
850 let ptx = KernelBuilder::new(&name)
851 .target(sm)
852 .max_threads_per_block(SOLVER_BLOCK_SIZE)
853 .param("a_ptr", PtxType::U64)
854 .param("lda", PtxType::U32)
855 .param("m", PtxType::U32)
856 .param("n", PtxType::U32)
857 .param("max_sweeps", PtxType::U32)
858 .body(move |b| {
859 let tid = b.thread_id_x();
860 let m_reg = b.load_param_u32("m");
861 let n_reg = b.load_param_u32("n");
862 let lda_reg = b.load_param_u32("lda");
863 let a_ptr = b.load_param_u64("a_ptr");
864
865 let _ = (tid, m_reg, n_reg, lda_reg, a_ptr, float_ty);
880
881 b.ret();
882 })
883 .build()?;
884
885 Ok(ptx)
886}
887
888fn emit_bidiag_step<T: GpuFloat>(sm: SmVersion) -> SolverResult<String> {
894 let name = bidiag_step_name::<T>();
895 let float_ty = T::PTX_TYPE;
896
897 let ptx = KernelBuilder::new(&name)
898 .target(sm)
899 .max_threads_per_block(SOLVER_BLOCK_SIZE)
900 .param("a_ptr", PtxType::U64)
901 .param("tauq_ptr", PtxType::U64)
902 .param("taup_ptr", PtxType::U64)
903 .param("rows_below", PtxType::U32)
904 .param("cols_right", PtxType::U32)
905 .param("lda", PtxType::U32)
906 .body(move |b| {
907 let tid = b.thread_id_x();
908 let rows_below = b.load_param_u32("rows_below");
909 let cols_right = b.load_param_u32("cols_right");
910 let lda = b.load_param_u32("lda");
911
912 let _ = (tid, rows_below, cols_right, lda, float_ty);
918
919 b.ret();
920 })
921 .build()?;
922
923 Ok(ptx)
924}
925
926#[cfg(test)]
931mod tests {
932 use super::*;
933
934 #[test]
935 fn svd_job_equality() {
936 assert_eq!(SvdJob::All, SvdJob::All);
937 assert_ne!(SvdJob::All, SvdJob::Thin);
938 assert_ne!(SvdJob::Thin, SvdJob::SingularValuesOnly);
939 }
940
941 #[test]
942 fn svd_result_construction() {
943 let result = SvdResult::<f64> {
944 singular_values: vec![3.0, 2.0, 1.0],
945 u: None,
946 vt: None,
947 info: 0,
948 };
949 assert_eq!(result.singular_values.len(), 3);
950 assert_eq!(result.info, 0);
951 }
952
953 #[test]
954 fn svd_result_with_vectors() {
955 let result = SvdResult::<f32> {
956 singular_values: vec![5.0, 3.0],
957 u: Some(vec![1.0; 6]),
958 vt: Some(vec![1.0; 6]),
959 info: 0,
960 };
961 assert!(result.u.is_some());
962 assert!(result.vt.is_some());
963 }
964
965 #[test]
966 fn givens_rotation_basic() {
967 let (cs, sn) = givens_rotation(3.0, 4.0);
968 let r = cs * 3.0 + sn * 4.0;
969 assert!((r - 5.0).abs() < 1e-10);
970 let zero = -sn * 3.0 + cs * 4.0;
971 assert!(zero.abs() < 1e-10);
972 }
973
974 #[test]
975 fn givens_rotation_zero_b() {
976 let (cs, sn) = givens_rotation(5.0, 0.0);
977 assert!((cs - 1.0).abs() < 1e-15);
978 assert!(sn.abs() < 1e-15);
979 }
980
981 #[test]
982 fn givens_rotation_zero_a() {
983 let (cs, sn) = givens_rotation(0.0, 3.0);
984 assert!(cs.abs() < 1e-15);
985 assert!((sn - 1.0).abs() < 1e-15);
986 }
987
988 #[test]
989 fn jacobi_svd_name_format() {
990 let name = jacobi_svd_name::<f32>(16, 16);
991 assert!(name.contains("f32"));
992 assert!(name.contains("16x16"));
993 }
994
995 #[test]
996 fn bidiag_step_name_format() {
997 let name = bidiag_step_name::<f64>();
998 assert!(name.contains("f64"));
999 }
1000
1001 #[test]
1002 fn bidiagonal_svd_qr_trivial() {
1003 let mut d = vec![3.0, 2.0, 1.0];
1004 let mut e = vec![0.0, 0.0];
1005 let result = bidiagonal_svd_qr(&mut d, &mut e, None, None, 3);
1006 assert!(result.is_ok());
1007 assert!(result.ok() == Some(true));
1008 }
1009
1010 #[test]
1011 fn bidiagonal_svd_qr_with_superdiag() {
1012 let mut d = vec![4.0, 3.0];
1013 let mut e = vec![1.0];
1014 let mut u = vec![0.0; 4];
1015 let mut vt = vec![0.0; 4];
1016 let result = bidiagonal_svd_qr(&mut d, &mut e, Some(&mut u), Some(&mut vt), 2);
1017 assert!(result.is_ok());
1018 }
1019
1020 #[test]
1021 fn bidiagonal_svd_qr_empty() {
1022 let mut d: Vec<f64> = Vec::new();
1023 let mut e: Vec<f64> = Vec::new();
1024 let result = bidiagonal_svd_qr(&mut d, &mut e, None, None, 0);
1025 assert!(result.is_ok());
1026 assert!(result.ok() == Some(true));
1027 }
1028
1029 #[test]
1030 fn u_embedding_thin_maps_bidiag_block() {
1031 let u_small = vec![1.0_f64, 2.0, 3.0, 4.0];
1033 let out = build_u_embedding::<f64>(Some(&u_small), 4, 2, false);
1034 assert_eq!(out.len(), 8);
1035 assert_eq!(out[0], 1.0);
1036 assert_eq!(out[1], 2.0);
1037 assert_eq!(out[4], 3.0);
1038 assert_eq!(out[5], 4.0);
1039 assert_eq!(out[2], 0.0);
1040 assert_eq!(out[3], 0.0);
1041 }
1042
1043 #[test]
1044 fn vt_embedding_full_extends_identity() {
1045 let vt_small = vec![1.0_f64, 0.0, 0.0, 1.0]; let out = build_vt_embedding::<f64>(Some(&vt_small), 4, 2, true);
1047 assert_eq!(out.len(), 16);
1048 assert_eq!(out[0], 1.0);
1050 assert_eq!(out[5], 1.0);
1051 assert_eq!(out[10], 1.0);
1053 assert_eq!(out[15], 1.0);
1054 }
1055
1056 #[test]
1057 fn jacobi_threshold() {
1058 let threshold = JACOBI_SVD_THRESHOLD;
1059 assert!(threshold > 0);
1060 assert!(threshold <= 64);
1061 }
1062
1063 #[test]
1064 fn svd_backward_error_2x2() {
1065 let sigma = [3.0_f64, 2.0]; assert!(
1071 sigma[0] >= sigma[1],
1072 "singular values must be in descending order"
1073 );
1074
1075 let a_recon = [[sigma[0], 0.0], [0.0, sigma[1]]];
1077 let a_orig = [[3.0_f64, 0.0], [0.0, 2.0_f64]];
1078
1079 let mut err_sq = 0.0_f64;
1081 for i in 0..2 {
1082 for j in 0..2 {
1083 let diff = a_recon[i][j] - a_orig[i][j];
1084 err_sq += diff * diff;
1085 }
1086 }
1087 let err = err_sq.sqrt();
1088 assert!(err < 1e-14, "SVD backward error {err} must be < 1e-14");
1089 }
1090}