1use faer::Par;
38use faer::linalg::matmul::triangular::{self as tri_matmul, BlockStructure};
39use faer::linalg::triangular_solve;
40use faer::perm::Perm;
41use faer::prelude::*;
42use faer::{Accum, Conj, Mat, MatMut, MatRef};
43
44use super::diagonal::MixedDiagonal;
45use super::pivot::{Block2x2, PivotType};
46use crate::error::SparseError;
47use crate::ordering::perm_from_forward;
48
49const BUNCH_KAUFMAN_ALPHA: f64 = 0.5;
51
52#[cfg(test)]
55const COMPLETE_PIVOTING_GROWTH_BOUND: f64 = 4.0;
56
57#[derive(Debug, Clone)]
86pub struct AptpOptions {
87 pub threshold: f64,
90 pub small: f64,
93 pub fallback: AptpFallback,
95 pub outer_block_size: usize,
98 pub inner_block_size: usize,
101 pub failed_pivot_method: FailedPivotMethod,
103 pub par: Par,
105 pub nemin: usize,
111 pub small_leaf_threshold: usize,
114}
115
116impl Default for AptpOptions {
117 fn default() -> Self {
118 Self {
119 threshold: 0.01,
120 small: 1e-20,
121 fallback: AptpFallback::BunchKaufman,
122 outer_block_size: 256,
123 inner_block_size: 32,
124 failed_pivot_method: FailedPivotMethod::Tpp,
125 par: Par::Seq,
126 nemin: 32,
127 small_leaf_threshold: 256,
128 }
129 }
130}
131
132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub enum AptpFallback {
139 BunchKaufman,
141 Delay,
143}
144
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum FailedPivotMethod {
156 Tpp,
161 Pass,
163}
164
165pub(crate) struct AptpKernelWorkspace {
179 backup: Mat<f64>,
181 l11_buf: Mat<f64>,
183 ld_buf: Mat<f64>,
185 copy_buf: Mat<f64>,
187}
188
189impl AptpKernelWorkspace {
190 pub(crate) fn new(max_front: usize, inner_block_size: usize) -> Self {
193 Self {
194 backup: Mat::zeros(max_front, inner_block_size),
195 l11_buf: Mat::zeros(inner_block_size, inner_block_size),
196 ld_buf: Mat::zeros(max_front, inner_block_size),
197 copy_buf: Mat::zeros(max_front, inner_block_size),
198 }
199 }
200}
201
202#[derive(Debug)]
211pub struct AptpFactorResult {
212 pub d: MixedDiagonal,
214 pub perm: Vec<usize>,
216 pub num_eliminated: usize,
218 pub delayed_cols: Vec<usize>,
220 pub stats: AptpStatistics,
222 pub pivot_log: Vec<AptpPivotRecord>,
224}
225
226#[derive(Debug)]
228pub struct AptpFactorization {
229 pub l: Mat<f64>,
231 pub d: MixedDiagonal,
233 pub perm: Perm<usize>,
235 pub delayed_cols: Vec<usize>,
237 pub stats: AptpStatistics,
239 pub pivot_log: Vec<AptpPivotRecord>,
241}
242
243#[derive(Debug, Clone, Default)]
247pub struct AptpStatistics {
248 pub num_1x1: usize,
250 pub num_2x2: usize,
252 pub num_delayed: usize,
254 pub max_l_entry: f64,
256}
257
258#[derive(Debug, Clone)]
260pub struct AptpPivotRecord {
261 pub col: usize,
263 pub pivot_type: PivotType,
265 pub max_l_entry: f64,
267 pub was_fallback: bool,
269}
270
271pub fn aptp_factor_in_place(
302 mut a: MatMut<'_, f64>,
303 num_fully_summed: usize,
304 options: &AptpOptions,
305) -> Result<AptpFactorResult, SparseError> {
306 let m = a.nrows();
307 if a.ncols() != m {
308 return Err(SparseError::InvalidInput {
309 reason: format!("matrix must be square, got {}x{}", m, a.ncols()),
310 });
311 }
312 if num_fully_summed > m {
313 return Err(SparseError::InvalidInput {
314 reason: format!(
315 "num_fully_summed ({}) > matrix dimension ({})",
316 num_fully_summed, m
317 ),
318 });
319 }
320 if options.threshold <= 0.0 || options.threshold > 1.0 {
321 return Err(SparseError::InvalidInput {
322 reason: format!("threshold must be in (0, 1], got {}", options.threshold),
323 });
324 }
325 if options.small < 0.0 {
326 return Err(SparseError::InvalidInput {
327 reason: format!("small must be >= 0.0, got {}", options.small),
328 });
329 }
330 if options.outer_block_size == 0 {
331 return Err(SparseError::InvalidInput {
332 reason: "outer_block_size must be > 0".to_string(),
333 });
334 }
335 if options.inner_block_size == 0 {
336 return Err(SparseError::InvalidInput {
337 reason: "inner_block_size must be > 0".to_string(),
338 });
339 }
340 if options.inner_block_size > options.outer_block_size {
341 return Err(SparseError::InvalidInput {
342 reason: format!(
343 "inner_block_size ({}) must be <= outer_block_size ({})",
344 options.inner_block_size, options.outer_block_size
345 ),
346 });
347 }
348
349 let mut kernel_ws = AptpKernelWorkspace::new(m, options.inner_block_size);
359
360 let mut result = if num_fully_summed < options.inner_block_size {
361 tpp_factor_rect(a.rb_mut(), num_fully_summed, options)?
363 } else if num_fully_summed > options.outer_block_size {
364 two_level_factor(a.rb_mut(), num_fully_summed, options, &mut kernel_ws)?
365 } else {
366 factor_inner(
367 a.rb_mut(),
368 num_fully_summed,
369 num_fully_summed,
370 options,
371 &mut kernel_ws,
372 )?
373 };
374
375 if result.num_eliminated < num_fully_summed
377 && options.failed_pivot_method == FailedPivotMethod::Tpp
378 {
379 let q = result.num_eliminated;
380
381 result.d.grow(num_fully_summed);
383
384 let additional = tpp_factor(
385 a.rb_mut(),
386 q,
387 num_fully_summed,
388 &mut result.perm,
389 &mut result.d,
390 &mut result.stats,
391 &mut result.pivot_log,
392 options,
393 );
394
395 result.num_eliminated = q + additional;
396 result.d.truncate(result.num_eliminated);
397 result.delayed_cols = (result.num_eliminated..num_fully_summed)
398 .map(|i| result.perm[i])
399 .collect();
400 result.stats.num_delayed = num_fully_summed - result.num_eliminated;
401 }
402
403 Ok(result)
404}
405
406pub fn aptp_factor(
418 a: MatRef<'_, f64>,
419 options: &AptpOptions,
420) -> Result<AptpFactorization, SparseError> {
421 let n = a.nrows();
422 if a.ncols() != n {
423 return Err(SparseError::InvalidInput {
424 reason: format!("matrix must be square, got {}x{}", n, a.ncols()),
425 });
426 }
427
428 let mut a_copy = a.to_owned();
429 let result = aptp_factor_in_place(a_copy.as_mut(), n, options)?;
430 let l = extract_l(a_copy.as_ref(), &result.d, result.num_eliminated);
431 let perm = perm_from_forward(result.perm.clone())?;
432
433 Ok(AptpFactorization {
434 l,
435 d: result.d,
436 perm,
437 delayed_cols: result.delayed_cols,
438 stats: result.stats,
439 pivot_log: result.pivot_log,
440 })
441}
442
443fn swap_symmetric(mut a: MatMut<'_, f64>, i: usize, j: usize) {
453 let m = a.nrows();
454 swap_symmetric_block(a.rb_mut(), i, j, 0, m);
455}
456
457fn swap_symmetric_block(
466 mut a: MatMut<'_, f64>,
467 i: usize,
468 j: usize,
469 col_start: usize,
470 row_limit: usize,
471) {
472 if i == j {
473 return;
474 }
475 let (i, j) = if i < j { (i, j) } else { (j, i) };
476
477 let tmp = a[(i, i)];
479 a[(i, i)] = a[(j, j)];
480 a[(j, j)] = tmp;
481
482 for k in col_start..i {
485 let tmp = a[(i, k)];
486 a[(i, k)] = a[(j, k)];
487 a[(j, k)] = tmp;
488 }
489
490 for k in (i + 1)..j {
492 let tmp = a[(k, i)];
493 a[(k, i)] = a[(j, k)];
494 a[(j, k)] = tmp;
495 }
496
497 for k in (j + 1)..row_limit {
499 let tmp = a[(k, i)];
500 a[(k, i)] = a[(k, j)];
501 a[(k, j)] = tmp;
502 }
503
504 }
506
507#[cfg(test)]
509#[allow(dead_code)] fn update_schur_1x1(mut a: MatMut<'_, f64>, col: usize, d_value: f64) {
511 let m = a.nrows();
512 let n_trail = m - col - 1;
513 if n_trail == 0 {
514 return;
515 }
516
517 let l_col: Vec<f64> = (0..n_trail).map(|i| a[(col + 1 + i, col)]).collect();
518
519 for j in 0..n_trail {
520 let ldlj = l_col[j] * d_value;
521 for i in j..n_trail {
522 let old = a[(col + 1 + i, col + 1 + j)];
523 a[(col + 1 + i, col + 1 + j)] = old - l_col[i] * ldlj;
524 }
525 }
526}
527
528#[cfg(test)]
530#[allow(dead_code)] fn update_schur_2x2(mut a: MatMut<'_, f64>, col: usize, partner: usize, block: &Block2x2) {
532 let m = a.nrows();
533 let start = col.max(partner) + 1;
534 let n_trail = m - start;
535 if n_trail == 0 {
536 return;
537 }
538
539 let l1: Vec<f64> = (0..n_trail).map(|i| a[(start + i, col)]).collect();
540 let l2: Vec<f64> = (0..n_trail).map(|i| a[(start + i, partner)]).collect();
541
542 let d_a = block.a;
543 let d_b = block.b;
544 let d_c = block.c;
545
546 for j in 0..n_trail {
547 let w_j1 = l1[j] * d_a + l2[j] * d_b;
548 let w_j2 = l1[j] * d_b + l2[j] * d_c;
549 for i in j..n_trail {
550 let update = l1[i] * w_j1 + l2[i] * w_j2;
551 let old = a[(start + i, start + j)];
552 a[(start + i, start + j)] = old - update;
553 }
554 }
555}
556
557fn extract_l(a: MatRef<'_, f64>, d: &MixedDiagonal, num_eliminated: usize) -> Mat<f64> {
563 let n = a.nrows();
564 let mut l = Mat::zeros(n, n);
565
566 for i in 0..n {
567 l[(i, i)] = 1.0;
568 }
569
570 let mut col = 0;
571 while col < num_eliminated {
572 match d.pivot_type(col) {
573 PivotType::OneByOne => {
574 for i in (col + 1)..n {
575 l[(i, col)] = a[(i, col)];
576 }
577 col += 1;
578 }
579 PivotType::TwoByTwo { partner } if partner > col => {
580 for i in (col + 2)..n {
583 l[(i, col)] = a[(i, col)];
584 l[(i, col + 1)] = a[(i, col + 1)];
585 }
586 col += 2;
587 }
588 _ => {
589 col += 1;
590 }
591 }
592 }
593
594 l
595}
596
597#[cfg(test)]
619fn complete_pivoting_factor(mut a: MatMut<'_, f64>, small: f64) -> AptpFactorResult {
620 let n = a.nrows();
621 debug_assert_eq!(
622 n,
623 a.ncols(),
624 "complete_pivoting_factor requires square matrix"
625 );
626
627 let mut col_order: Vec<usize> = (0..n).collect();
628 let mut d = MixedDiagonal::new(n);
629 let mut stats = AptpStatistics::default();
630 let mut pivot_log = Vec::with_capacity(n);
631 let mut k = 0; while k < n {
634 let remaining = n - k;
635
636 let mut max_val = 0.0_f64;
638 let mut max_row = k;
639 let mut max_col = k;
640 for j in k..n {
641 for i in j..n {
642 let v = a[(i, j)].abs();
643 if v > max_val {
644 max_val = v;
645 max_row = i;
646 max_col = j;
647 }
648 }
649 }
650
651 if max_val < small {
653 for &orig_col in &col_order[k..n] {
655 stats.num_delayed += 1;
656 pivot_log.push(AptpPivotRecord {
657 col: orig_col,
658 pivot_type: PivotType::Delayed,
659 max_l_entry: 0.0,
660 was_fallback: false,
661 });
662 }
663 break;
664 }
665
666 if max_row == max_col {
668 if max_row != k {
671 swap_symmetric(a.rb_mut(), k, max_row);
672 col_order.swap(k, max_row);
673 }
674
675 let d_kk = a[(k, k)];
676 let inv_d = 1.0 / d_kk;
677
678 let mut max_l = 0.0_f64;
680 for i in (k + 1)..n {
681 let l_ik = a[(i, k)] * inv_d;
682 a[(i, k)] = l_ik;
683 let abs_l = l_ik.abs();
684 if abs_l > max_l {
685 max_l = abs_l;
686 }
687 }
688
689 update_schur_1x1(a.rb_mut(), k, d_kk);
691
692 d.set_1x1(k, d_kk);
693 stats.num_1x1 += 1;
694 if max_l > stats.max_l_entry {
695 stats.max_l_entry = max_l;
696 }
697 pivot_log.push(AptpPivotRecord {
698 col: col_order[k],
699 pivot_type: PivotType::OneByOne,
700 max_l_entry: max_l,
701 was_fallback: false,
702 });
703 k += 1;
704 } else {
705 let t = max_row;
708 let m = max_col;
709
710 let a_mm = a[(m, m)];
713 let a_tt = a[(t, t)];
714 let a_tm = a[(t, m)]; let delta = a_mm * a_tt - a_tm * a_tm;
716
717 if remaining < 2 {
718 let d_kk = a[(k, k)];
721 if d_kk.abs() < small {
722 stats.num_delayed += 1;
723 pivot_log.push(AptpPivotRecord {
724 col: col_order[k],
725 pivot_type: PivotType::Delayed,
726 max_l_entry: 0.0,
727 was_fallback: false,
728 });
729 break;
730 }
731 let inv_d = 1.0 / d_kk;
732 for i in (k + 1)..n {
733 a[(i, k)] *= inv_d;
734 }
735 d.set_1x1(k, d_kk);
736 stats.num_1x1 += 1;
737 k += 1;
738 continue;
739 }
740
741 if delta.abs() >= BUNCH_KAUFMAN_ALPHA * a_tm * a_tm {
742 if m != k {
745 swap_symmetric(a.rb_mut(), k, m);
746 col_order.swap(k, m);
747 }
748 let new_t = if t == k { m } else { t };
750 if new_t != k + 1 {
751 swap_symmetric(a.rb_mut(), k + 1, new_t);
752 col_order.swap(k + 1, new_t);
753 }
754
755 let a11 = a[(k, k)];
756 let a22 = a[(k + 1, k + 1)];
757 let a21 = a[(k + 1, k)];
758 let det = a11 * a22 - a21 * a21;
759 let inv_det = 1.0 / det;
760
761 let block = Block2x2 {
762 first_col: k,
763 a: a11,
764 b: a21,
765 c: a22,
766 };
767
768 let mut max_l = 0.0_f64;
770 let rows_start = k + 2;
771 for i in rows_start..n {
772 let ai1 = a[(i, k)];
773 let ai2 = a[(i, k + 1)];
774 let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
775 let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
776 a[(i, k)] = l_i1;
777 a[(i, k + 1)] = l_i2;
778 if l_i1.abs() > max_l {
779 max_l = l_i1.abs();
780 }
781 if l_i2.abs() > max_l {
782 max_l = l_i2.abs();
783 }
784 }
785
786 update_schur_2x2(a.rb_mut(), k, k + 1, &block);
788
789 d.set_2x2(block);
790 stats.num_2x2 += 1;
791 if max_l > stats.max_l_entry {
792 stats.max_l_entry = max_l;
793 }
794 pivot_log.push(AptpPivotRecord {
795 col: col_order[k],
796 pivot_type: PivotType::TwoByTwo {
797 partner: col_order[k + 1],
798 },
799 max_l_entry: max_l,
800 was_fallback: false,
801 });
802 pivot_log.push(AptpPivotRecord {
803 col: col_order[k + 1],
804 pivot_type: PivotType::TwoByTwo {
805 partner: col_order[k],
806 },
807 max_l_entry: max_l,
808 was_fallback: false,
809 });
810 k += 2;
811 } else {
812 let pivot_pos = if a_mm.abs() >= a_tt.abs() { m } else { t };
814 if pivot_pos != k {
815 swap_symmetric(a.rb_mut(), k, pivot_pos);
816 col_order.swap(k, pivot_pos);
817 }
818
819 let d_kk = a[(k, k)];
820 let inv_d = 1.0 / d_kk;
821
822 let mut max_l = 0.0_f64;
823 for i in (k + 1)..n {
824 let l_ik = a[(i, k)] * inv_d;
825 a[(i, k)] = l_ik;
826 let abs_l = l_ik.abs();
827 if abs_l > max_l {
828 max_l = abs_l;
829 }
830 }
831
832 update_schur_1x1(a.rb_mut(), k, d_kk);
833
834 d.set_1x1(k, d_kk);
835 stats.num_1x1 += 1;
836 if max_l > stats.max_l_entry {
837 stats.max_l_entry = max_l;
838 }
839 pivot_log.push(AptpPivotRecord {
840 col: col_order[k],
841 pivot_type: PivotType::OneByOne,
842 max_l_entry: max_l,
843 was_fallback: true, });
845 k += 1;
846 }
847 }
848 }
849
850 let num_eliminated = k;
851
852 d.truncate(num_eliminated);
853
854 let delayed_cols: Vec<usize> = (num_eliminated..n).map(|i| col_order[i]).collect();
855
856 AptpFactorResult {
857 d,
858 perm: col_order,
859 num_eliminated,
860 delayed_cols,
861 stats,
862 pivot_log,
863 }
864}
865
866fn factor_block_diagonal(
892 mut a: MatMut<'_, f64>,
893 col_start: usize,
894 block_size: usize,
895 small: f64,
896 row_limit: usize,
897) -> (
898 MixedDiagonal,
899 Vec<usize>,
900 usize,
901 AptpStatistics,
902 Vec<AptpPivotRecord>,
903) {
904 let block_end = col_start + block_size;
905
906 let mut local_perm: Vec<usize> = (0..block_size).collect();
907 let mut d = MixedDiagonal::new(block_size);
908 let mut stats = AptpStatistics::default();
909 let mut pivot_log = Vec::with_capacity(block_size);
910 let mut k = 0; while k < block_size {
913 let cur = col_start + k; let search_end = block_end;
915 let remaining = block_size - k;
916
917 let mut max_val = 0.0_f64;
919 let mut max_row = cur;
920 let mut max_col = cur;
921 for j in cur..search_end {
922 for i in j..search_end {
923 let v = a[(i, j)].abs();
924 if v > max_val {
925 max_val = v;
926 max_row = i;
927 max_col = j;
928 }
929 }
930 }
931
932 if max_val < small {
934 stats.num_delayed += remaining;
936 for &perm_val in &local_perm[k..block_size] {
937 pivot_log.push(AptpPivotRecord {
938 col: perm_val,
939 pivot_type: PivotType::Delayed,
940 max_l_entry: 0.0,
941 was_fallback: false,
942 });
943 }
944 break;
945 }
946
947 if max_row == max_col {
949 if max_row != cur {
951 swap_symmetric_block(a.rb_mut(), cur, max_row, col_start, row_limit);
952 local_perm.swap(k, max_row - col_start);
953 }
954
955 let d_kk = a[(cur, cur)];
956 let inv_d = 1.0 / d_kk;
957
958 let mut max_l = 0.0_f64;
960 for i in (cur + 1)..block_end {
961 let l_ik = a[(i, cur)] * inv_d;
962 a[(i, cur)] = l_ik;
963 let abs_l = l_ik.abs();
964 if abs_l > max_l {
965 max_l = abs_l;
966 }
967 }
968
969 for j in (cur + 1)..block_end {
971 let l_j = a[(j, cur)];
972 let ldl_j = l_j * d_kk;
973 for i in j..block_end {
974 a[(i, j)] -= a[(i, cur)] * ldl_j;
975 }
976 }
977
978 d.set_1x1(k, d_kk);
979 stats.num_1x1 += 1;
980 if max_l > stats.max_l_entry {
981 stats.max_l_entry = max_l;
982 }
983 pivot_log.push(AptpPivotRecord {
984 col: local_perm[k],
985 pivot_type: PivotType::OneByOne,
986 max_l_entry: max_l,
987 was_fallback: false,
988 });
989 k += 1;
990 } else {
991 let t = max_row; let m_idx = max_col;
994
995 let a_mm = a[(m_idx, m_idx)];
996 let a_tt = a[(t, t)];
997 let a_tm = a[(t, m_idx)]; let delta = a_mm * a_tt - a_tm * a_tm;
999
1000 if remaining < 2 {
1001 let pivot_pos = if a_mm.abs() >= a_tt.abs() { m_idx } else { t };
1003 if pivot_pos != cur {
1004 swap_symmetric_block(a.rb_mut(), cur, pivot_pos, col_start, row_limit);
1005 local_perm.swap(k, pivot_pos - col_start);
1006 }
1007 let d_kk = a[(cur, cur)];
1008 if d_kk.abs() < small {
1009 stats.num_delayed += 1;
1010 pivot_log.push(AptpPivotRecord {
1011 col: local_perm[k],
1012 pivot_type: PivotType::Delayed,
1013 max_l_entry: 0.0,
1014 was_fallback: false,
1015 });
1016 break;
1017 }
1018 let inv_d = 1.0 / d_kk;
1019 for i in (cur + 1)..block_end {
1020 a[(i, cur)] *= inv_d;
1021 }
1022 d.set_1x1(k, d_kk);
1023 stats.num_1x1 += 1;
1024 k += 1;
1025 continue;
1026 }
1027
1028 if delta.abs() >= BUNCH_KAUFMAN_ALPHA * a_tm * a_tm {
1029 if m_idx != cur {
1031 swap_symmetric_block(a.rb_mut(), cur, m_idx, col_start, row_limit);
1032 local_perm.swap(k, m_idx - col_start);
1033 }
1034 let new_t = if t == cur { m_idx } else { t };
1035 if new_t != cur + 1 {
1036 swap_symmetric_block(a.rb_mut(), cur + 1, new_t, col_start, row_limit);
1037 local_perm.swap(k + 1, new_t - col_start);
1038 }
1039
1040 let a11 = a[(cur, cur)];
1041 let a22 = a[(cur + 1, cur + 1)];
1042 let a21 = a[(cur + 1, cur)];
1043 let det = a11 * a22 - a21 * a21;
1044 let inv_det = 1.0 / det;
1045
1046 let block = Block2x2 {
1047 first_col: k,
1048 a: a11,
1049 b: a21,
1050 c: a22,
1051 };
1052
1053 let mut max_l = 0.0_f64;
1055 let rows_start = cur + 2;
1056 for i in rows_start..block_end {
1057 let ai1 = a[(i, cur)];
1058 let ai2 = a[(i, cur + 1)];
1059 let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
1060 let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
1061 a[(i, cur)] = l_i1;
1062 a[(i, cur + 1)] = l_i2;
1063 if l_i1.abs() > max_l {
1064 max_l = l_i1.abs();
1065 }
1066 if l_i2.abs() > max_l {
1067 max_l = l_i2.abs();
1068 }
1069 }
1070
1071 for j in rows_start..block_end {
1073 let l1j = a[(j, cur)];
1074 let l2j = a[(j, cur + 1)];
1075 let w_j1 = l1j * a11 + l2j * a21;
1076 let w_j2 = l1j * a21 + l2j * a22;
1077 for i in j..block_end {
1078 let l1i = a[(i, cur)];
1079 let l2i = a[(i, cur + 1)];
1080 a[(i, j)] -= l1i * w_j1 + l2i * w_j2;
1081 }
1082 }
1083
1084 d.set_2x2(block);
1085 stats.num_2x2 += 1;
1086 if max_l > stats.max_l_entry {
1087 stats.max_l_entry = max_l;
1088 }
1089 pivot_log.push(AptpPivotRecord {
1090 col: local_perm[k],
1091 pivot_type: PivotType::TwoByTwo {
1092 partner: local_perm[k + 1],
1093 },
1094 max_l_entry: max_l,
1095 was_fallback: false,
1096 });
1097 pivot_log.push(AptpPivotRecord {
1098 col: local_perm[k + 1],
1099 pivot_type: PivotType::TwoByTwo {
1100 partner: local_perm[k],
1101 },
1102 max_l_entry: max_l,
1103 was_fallback: false,
1104 });
1105 k += 2;
1106 } else {
1107 let pivot_pos = if a_mm.abs() >= a_tt.abs() { m_idx } else { t };
1109 if pivot_pos != cur {
1110 swap_symmetric_block(a.rb_mut(), cur, pivot_pos, col_start, row_limit);
1111 local_perm.swap(k, pivot_pos - col_start);
1112 }
1113
1114 let d_kk = a[(cur, cur)];
1115 let inv_d = 1.0 / d_kk;
1116
1117 let mut max_l = 0.0_f64;
1118 for i in (cur + 1)..block_end {
1119 let l_ik = a[(i, cur)] * inv_d;
1120 a[(i, cur)] = l_ik;
1121 let abs_l = l_ik.abs();
1122 if abs_l > max_l {
1123 max_l = abs_l;
1124 }
1125 }
1126
1127 for j in (cur + 1)..block_end {
1128 let l_j = a[(j, cur)];
1129 let ldl_j = l_j * d_kk;
1130 for i in j..block_end {
1131 a[(i, j)] -= a[(i, cur)] * ldl_j;
1132 }
1133 }
1134
1135 d.set_1x1(k, d_kk);
1136 stats.num_1x1 += 1;
1137 if max_l > stats.max_l_entry {
1138 stats.max_l_entry = max_l;
1139 }
1140 pivot_log.push(AptpPivotRecord {
1141 col: local_perm[k],
1142 pivot_type: PivotType::OneByOne,
1143 max_l_entry: max_l,
1144 was_fallback: true,
1145 });
1146 k += 1;
1147 }
1148 }
1149 }
1150
1151 let nelim = k;
1152 (d, local_perm, nelim, stats, pivot_log)
1153}
1154
1155fn adjust_for_2x2_boundary(effective_nelim: usize, d: &MixedDiagonal) -> usize {
1161 if effective_nelim == 0 {
1162 return 0;
1163 }
1164 let last = effective_nelim - 1;
1165 match d.pivot_type(last) {
1166 PivotType::TwoByTwo { partner } if partner > last => {
1167 effective_nelim - 1
1169 }
1170 _ => effective_nelim,
1171 }
1172}
1173
1174struct BlockBackup<'a> {
1184 data: MatMut<'a, f64>,
1185}
1186
1187impl<'a> BlockBackup<'a> {
1188 fn create(
1191 a: MatRef<'_, f64>,
1192 col_start: usize,
1193 block_cols: usize,
1194 m: usize,
1195 buf: &'a mut Mat<f64>,
1196 ) -> Self {
1197 let rows = m - col_start;
1198 let mut data = buf.as_mut().submatrix_mut(0, 0, rows, block_cols);
1199 for j in 0..block_cols {
1200 for i in 0..rows {
1201 data[(i, j)] = a[(col_start + i, col_start + j)];
1202 }
1203 }
1204 BlockBackup { data }
1205 }
1206
1207 fn restore_failed(
1210 &self,
1211 mut a: MatMut<'_, f64>,
1212 col_start: usize,
1213 nelim: usize,
1214 block_cols: usize,
1215 m: usize,
1216 ) {
1217 let rows = m - col_start;
1218 for j in nelim..block_cols {
1219 for i in 0..rows {
1220 a[(col_start + i, col_start + j)] = self.data[(i, j)];
1221 }
1222 }
1223 }
1224
1225 fn restore_diagonal_with_perm(
1238 &self,
1239 mut a: MatMut<'_, f64>,
1240 col_start: usize,
1241 nelim: usize,
1242 block_cols: usize,
1243 m: usize,
1244 block_perm: &[usize],
1245 ) {
1246 for j in nelim..block_cols {
1252 let c = block_perm[j]; for i in nelim..block_cols {
1254 let r = block_perm[i]; let val = if r >= c {
1257 self.data[(r, c)]
1258 } else {
1259 self.data[(c, r)]
1260 };
1261 a[(col_start + i, col_start + j)] = val;
1262 }
1263 }
1264
1265 for j in nelim..block_cols {
1270 let c = block_perm[j]; for i in block_cols..(m - col_start) {
1272 a[(col_start + i, col_start + j)] = self.data[(i, c)];
1273 }
1274 }
1275 }
1276}
1277
1278#[allow(clippy::too_many_arguments)]
1295fn apply_and_check(
1296 mut a: MatMut<'_, f64>,
1297 col_start: usize,
1298 block_nelim: usize,
1299 block_cols: usize,
1300 m: usize,
1301 d: &MixedDiagonal,
1302 threshold: f64,
1303 par: Par,
1304 l11_buf: &mut Mat<f64>,
1305) -> usize {
1306 if block_nelim == 0 {
1307 return 0;
1308 }
1309
1310 let panel_rows = m - col_start - block_cols;
1311 if panel_rows == 0 {
1312 return block_nelim;
1313 }
1314
1315 let panel_start = col_start + block_cols;
1323
1324 {
1326 let src = a
1327 .rb()
1328 .submatrix(col_start, col_start, block_nelim, block_nelim);
1329 let mut dst = l11_buf
1330 .as_mut()
1331 .submatrix_mut(0, 0, block_nelim, block_nelim);
1332 dst.copy_from(src);
1333 }
1334 let l11_ref = l11_buf.as_ref().submatrix(0, 0, block_nelim, block_nelim);
1335 let panel = a
1336 .rb_mut()
1337 .submatrix_mut(panel_start, col_start, panel_rows, block_nelim);
1338 triangular_solve::solve_unit_lower_triangular_in_place(l11_ref, panel.transpose_mut(), par);
1339
1340 let mut col = 0;
1344 while col < block_nelim {
1345 match d.pivot_type(col) {
1346 PivotType::OneByOne => {
1347 let d_val = d.diagonal_1x1(col);
1348 let inv_d = 1.0 / d_val;
1349 for i in 0..panel_rows {
1350 a[(panel_start + i, col_start + col)] *= inv_d;
1351 }
1352 col += 1;
1353 }
1354 PivotType::TwoByTwo { partner } if partner > col => {
1355 let block = d.diagonal_2x2(col);
1356 let det = block.a * block.c - block.b * block.b;
1357 let inv_det = 1.0 / det;
1358 for i in 0..panel_rows {
1359 let x1 = a[(panel_start + i, col_start + col)];
1360 let x2 = a[(panel_start + i, col_start + col + 1)];
1361 a[(panel_start + i, col_start + col)] = (x1 * block.c - x2 * block.b) * inv_det;
1362 a[(panel_start + i, col_start + col + 1)] =
1363 (x2 * block.a - x1 * block.b) * inv_det;
1364 }
1365 col += 2;
1366 }
1367 _ => {
1368 col += 1;
1369 }
1370 }
1371 }
1372
1373 let stability_bound = 1.0 / threshold;
1375 let mut effective_nelim = block_nelim;
1376
1377 let mut scan_col = 0;
1378 while scan_col < block_nelim {
1379 let pivot_width = match d.pivot_type(scan_col) {
1380 PivotType::TwoByTwo { partner } if partner > scan_col => 2,
1381 _ => 1,
1382 };
1383
1384 let mut col_ok = true;
1385 for c in scan_col..scan_col + pivot_width {
1386 if c >= block_nelim {
1387 break;
1388 }
1389 for i in 0..panel_rows {
1390 if a[(panel_start + i, col_start + c)].abs() >= stability_bound {
1391 col_ok = false;
1392 break;
1393 }
1394 }
1395 if !col_ok {
1396 break;
1397 }
1398 }
1399
1400 if !col_ok {
1401 effective_nelim = scan_col;
1402 break;
1403 }
1404 scan_col += pivot_width;
1405 }
1406
1407 effective_nelim
1408}
1409
1410#[allow(clippy::too_many_arguments)]
1418fn update_trailing(
1419 mut a: MatMut<'_, f64>,
1420 col_start: usize,
1421 nelim: usize,
1422 block_cols: usize,
1423 m: usize,
1424 num_fully_summed: usize,
1425 d: &MixedDiagonal,
1426 par: Par,
1427 ld_buf: &mut Mat<f64>,
1428 copy_buf: &mut Mat<f64>,
1429) {
1430 if nelim == 0 {
1431 return;
1432 }
1433
1434 let trailing_start = col_start + block_cols;
1435 let trailing_size = m - trailing_start;
1436 if trailing_size == 0 {
1437 return;
1438 }
1439
1440 let p = num_fully_summed;
1442
1443 let mut w = ld_buf.as_mut().submatrix_mut(0, 0, trailing_size, nelim);
1447 let mut col = 0;
1448 while col < nelim {
1449 match d.pivot_type(col) {
1450 PivotType::OneByOne => {
1451 let d_val = d.diagonal_1x1(col);
1452 for i in 0..trailing_size {
1453 w[(i, col)] = a[(trailing_start + i, col_start + col)] * d_val;
1454 }
1455 col += 1;
1456 }
1457 PivotType::TwoByTwo { partner } if partner > col => {
1458 let block = d.diagonal_2x2(col);
1459 for i in 0..trailing_size {
1460 let l1 = a[(trailing_start + i, col_start + col)];
1461 let l2 = a[(trailing_start + i, col_start + col + 1)];
1462 w[(i, col)] = l1 * block.a + l2 * block.b;
1463 w[(i, col + 1)] = l1 * block.b + l2 * block.c;
1464 }
1465 col += 2;
1466 }
1467 _ => {
1468 col += 1;
1469 }
1470 }
1471 }
1472
1473 {
1475 let src = a
1476 .rb()
1477 .submatrix(trailing_start, col_start, trailing_size, nelim);
1478 let mut dst = copy_buf.as_mut().submatrix_mut(0, 0, trailing_size, nelim);
1479 dst.copy_from(src);
1480 }
1481
1482 let fs_size = p.saturating_sub(trailing_start);
1484 if fs_size > 0 {
1485 let w_fs = ld_buf.as_ref().submatrix(0, 0, fs_size, nelim);
1486 let l21_fs = copy_buf.as_ref().submatrix(0, 0, fs_size, nelim);
1487 let mut a_fs = a
1488 .rb_mut()
1489 .submatrix_mut(trailing_start, trailing_start, fs_size, fs_size);
1490
1491 tri_matmul::matmul_with_conj(
1492 a_fs.rb_mut(),
1493 BlockStructure::TriangularLower,
1494 Accum::Add,
1495 w_fs,
1496 BlockStructure::Rectangular,
1497 Conj::No,
1498 l21_fs.transpose(),
1499 BlockStructure::Rectangular,
1500 Conj::No,
1501 -1.0,
1502 par,
1503 );
1504 }
1505
1506 let nfs_size = m - p;
1508 if nfs_size > 0 && fs_size > 0 {
1509 let w_nfs = ld_buf.as_ref().submatrix(fs_size, 0, nfs_size, nelim);
1510 let l21_fs = copy_buf.as_ref().submatrix(0, 0, fs_size, nelim);
1511 let mut a_cross = a.submatrix_mut(p, trailing_start, nfs_size, fs_size);
1512
1513 faer::linalg::matmul::matmul_with_conj(
1514 a_cross.rb_mut(),
1515 Accum::Add,
1516 w_nfs,
1517 Conj::No,
1518 l21_fs.transpose(),
1519 Conj::No,
1520 -1.0,
1521 par,
1522 );
1523 }
1524
1525 }
1527
1528fn compute_ld_into(l: MatRef<'_, f64>, d: &MixedDiagonal, nelim: usize, mut dst: MatMut<'_, f64>) {
1537 let nrows = l.nrows();
1538 let mut col = 0;
1539 while col < nelim {
1540 match d.pivot_type(col) {
1541 PivotType::OneByOne => {
1542 let d_val = d.diagonal_1x1(col);
1543 for i in 0..nrows {
1544 dst[(i, col)] = l[(i, col)] * d_val;
1545 }
1546 col += 1;
1547 }
1548 PivotType::TwoByTwo { partner } if partner > col => {
1549 let block = d.diagonal_2x2(col);
1550 for i in 0..nrows {
1551 let l1 = l[(i, col)];
1552 let l2 = l[(i, col + 1)];
1553 dst[(i, col)] = l1 * block.a + l2 * block.b;
1554 dst[(i, col + 1)] = l1 * block.b + l2 * block.c;
1555 }
1556 col += 2;
1557 }
1558 _ => {
1559 col += 1;
1560 }
1561 }
1562 }
1563}
1564
1565#[allow(clippy::too_many_arguments)]
1590pub(crate) fn compute_contribution_gemm(
1591 frontal_data: &Mat<f64>,
1592 num_fully_summed: usize,
1593 num_eliminated: usize,
1594 m: usize,
1595 d: &MixedDiagonal,
1596 contrib_buffer: &mut Mat<f64>,
1597 ld_workspace: &mut Mat<f64>,
1598 par: Par,
1599) {
1600 let p = num_fully_summed;
1601 let ne = num_eliminated;
1602 let nfs = m - p;
1603
1604 if nfs == 0 {
1605 return; }
1607
1608 for j in 0..nfs {
1616 let col_len = nfs - j;
1617 let src = &frontal_data.col_as_slice(p + j)[p + j..m];
1618 contrib_buffer.col_as_slice_mut(j)[j..j + col_len].copy_from_slice(src);
1619 }
1620
1621 if ne == 0 {
1622 return; }
1624
1625 let l21_nfs = frontal_data.as_ref().submatrix(p, 0, nfs, ne);
1628
1629 if ld_workspace.nrows() < nfs || ld_workspace.ncols() < ne {
1632 *ld_workspace = Mat::zeros(nfs.max(ld_workspace.nrows()), ne.max(ld_workspace.ncols()));
1633 }
1634 let mut w = ld_workspace.as_mut().submatrix_mut(0, 0, nfs, ne);
1635 w.fill(0.0);
1636 compute_ld_into(l21_nfs, d, ne, w.rb_mut());
1637
1638 tri_matmul::matmul_with_conj(
1640 contrib_buffer.as_mut().submatrix_mut(0, 0, nfs, nfs),
1641 BlockStructure::TriangularLower,
1642 Accum::Add,
1643 w.as_ref(),
1644 BlockStructure::Rectangular,
1645 Conj::No,
1646 l21_nfs.transpose(),
1647 BlockStructure::Rectangular,
1648 Conj::No,
1649 -1.0,
1650 par,
1651 );
1652}
1653
1654#[allow(clippy::too_many_arguments)]
1669fn update_cross_terms(
1670 mut a: MatMut<'_, f64>,
1671 col_start: usize,
1672 nelim: usize,
1673 block_cols: usize,
1674 m: usize,
1675 d: &MixedDiagonal,
1676 ld_buf: &mut Mat<f64>,
1677 copy_buf: &mut Mat<f64>,
1678) {
1679 if nelim == 0 || nelim >= block_cols {
1680 return; }
1682
1683 let n_failed = block_cols - nelim;
1684 let trailing_start = col_start + block_cols;
1685 let trailing_size = m - trailing_start;
1686
1687 {
1690 let src = a
1691 .rb()
1692 .submatrix(col_start + nelim, col_start, n_failed, nelim);
1693 let mut dst = copy_buf.as_mut().submatrix_mut(0, 0, n_failed, nelim);
1694 dst.copy_from(src);
1695 }
1696
1697 {
1699 let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
1700 compute_ld_into(
1701 l_blk,
1702 d,
1703 nelim,
1704 ld_buf.as_mut().submatrix_mut(0, 0, n_failed, nelim),
1705 );
1706 }
1707
1708 {
1711 let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
1712 let w_blk = ld_buf.as_ref().submatrix(0, 0, n_failed, nelim);
1713 for j in 0..n_failed {
1714 for i in j..n_failed {
1715 let mut sum = 0.0;
1716 for c in 0..nelim {
1717 sum += w_blk[(i, c)] * l_blk[(j, c)];
1718 }
1719 a[(col_start + nelim + i, col_start + nelim + j)] -= sum;
1720 }
1721 }
1722 }
1723
1724 if trailing_size > 0 {
1727 {
1729 let src = a
1730 .rb()
1731 .submatrix(trailing_start, col_start, trailing_size, nelim);
1732 let mut dst = copy_buf
1733 .as_mut()
1734 .submatrix_mut(n_failed, 0, trailing_size, nelim);
1735 dst.copy_from(src);
1736 }
1737
1738 let l_blk = copy_buf.as_ref().submatrix(0, 0, n_failed, nelim);
1741 let l_panel = copy_buf
1742 .as_ref()
1743 .submatrix(n_failed, 0, trailing_size, nelim);
1744
1745 compute_ld_into(
1747 l_panel,
1748 d,
1749 nelim,
1750 ld_buf
1751 .as_mut()
1752 .submatrix_mut(n_failed, 0, trailing_size, nelim),
1753 );
1754 let w_panel = ld_buf.as_ref().submatrix(n_failed, 0, trailing_size, nelim);
1755
1756 for j in 0..n_failed {
1757 for i in 0..trailing_size {
1758 let mut sum = 0.0;
1759 for c in 0..nelim {
1760 sum += w_panel[(i, c)] * l_blk[(j, c)];
1761 }
1762 a[(trailing_start + i, col_start + nelim + j)] -= sum;
1763 }
1764 }
1765 }
1766}
1767
1768fn factor_inner(
1812 mut a: MatMut<'_, f64>,
1813 num_fully_summed: usize,
1814 nfs_boundary: usize,
1815 options: &AptpOptions,
1816 kernel_ws: &mut AptpKernelWorkspace,
1817) -> Result<AptpFactorResult, SparseError> {
1818 let m = a.nrows();
1819 let ib = options.inner_block_size;
1820 let small = options.small;
1821 let threshold = options.threshold;
1822 let p = num_fully_summed;
1823
1824 let mut col_order: Vec<usize> = (0..m).collect();
1825 let mut d = MixedDiagonal::new(p);
1826 let mut stats = AptpStatistics::default();
1827 let mut pivot_log = Vec::with_capacity(p);
1828 let mut k = 0;
1829 let mut end_pos = p;
1830
1831 let mut panel_perm_buf = vec![0.0f64; ib];
1834 let mut row_perm_buf = vec![0.0f64; ib];
1835 let mut col_order_buf = vec![0usize; ib];
1836
1837 while k < end_pos {
1856 let block_size = (end_pos - k).min(ib);
1857 let block_end = k + block_size;
1858
1859 let backup = BlockBackup::create(a.as_ref(), k, block_size, m, &mut kernel_ws.backup);
1861
1862 let (block_d, block_perm, block_nelim, block_stats, block_log) =
1866 factor_block_diagonal(a.rb_mut(), k, block_size, small, block_end);
1867
1868 if block_nelim == 0 {
1869 backup.restore_failed(a.rb_mut(), k, 0, block_size, m);
1871 for offset in 0..block_size {
1872 let delayed_orig = col_order[k + block_perm[offset]];
1873 end_pos -= 1;
1874 if k + offset < end_pos {
1875 swap_symmetric(a.rb_mut(), k + offset, end_pos);
1876 col_order.swap(k + offset, end_pos);
1877 }
1878 stats.num_delayed += 1;
1879 pivot_log.push(AptpPivotRecord {
1880 col: delayed_orig,
1881 pivot_type: PivotType::Delayed,
1882 max_l_entry: 0.0,
1883 was_fallback: false,
1884 });
1885 }
1886 continue;
1887 }
1888
1889 let panel_start = block_end;
1892 for r in panel_start..m {
1893 for i in 0..block_size {
1894 panel_perm_buf[i] = a[(r, k + block_perm[i])];
1895 }
1896 for i in 0..block_size {
1897 a[(r, k + i)] = panel_perm_buf[i];
1898 }
1899 }
1900
1901 {
1904 let mut bc = 0;
1905 while bc < block_nelim {
1906 match block_d.pivot_type(bc) {
1907 PivotType::TwoByTwo { partner } if partner > bc => {
1908 a[(k + bc + 1, k + bc)] = 0.0;
1909 bc += 2;
1910 }
1911 _ => {
1912 bc += 1;
1913 }
1914 }
1915 }
1916 }
1917
1918 if k > 0 {
1922 for c in 0..k {
1923 for i in 0..block_size {
1924 row_perm_buf[i] = a[(k + block_perm[i], c)];
1925 }
1926 for i in 0..block_size {
1927 a[(k + i, c)] = row_perm_buf[i];
1928 }
1929 }
1930 }
1931
1932 col_order_buf[..block_size].copy_from_slice(&col_order[k..k + block_size]);
1934 for i in 0..block_size {
1935 col_order[k + i] = col_order_buf[block_perm[i]];
1936 }
1937
1938 let mut effective_nelim = apply_and_check(
1940 a.rb_mut(),
1941 k,
1942 block_nelim,
1943 block_size,
1944 m,
1945 &block_d,
1946 threshold,
1947 options.par,
1948 &mut kernel_ws.l11_buf,
1949 );
1950
1951 effective_nelim = adjust_for_2x2_boundary(effective_nelim, &block_d);
1953
1954 if effective_nelim < block_nelim {
1958 backup.restore_diagonal_with_perm(
1959 a.rb_mut(),
1960 k,
1961 effective_nelim,
1962 block_size,
1963 m,
1964 &block_perm,
1965 );
1966 }
1967
1968 let nelim = effective_nelim;
1970
1971 if nelim > 0 {
1978 update_trailing(
1979 a.rb_mut(),
1980 k,
1981 nelim,
1982 block_size,
1983 m,
1984 nfs_boundary,
1985 &block_d,
1986 options.par,
1987 &mut kernel_ws.ld_buf,
1988 &mut kernel_ws.copy_buf,
1989 );
1990 update_cross_terms(
1991 a.rb_mut(),
1992 k,
1993 nelim,
1994 block_size,
1995 m,
1996 &block_d,
1997 &mut kernel_ws.ld_buf,
1998 &mut kernel_ws.copy_buf,
1999 );
2000 }
2001
2002 d.copy_from_offset(&block_d, k, nelim);
2004
2005 let mut passed_1x1 = 0;
2007 let mut passed_2x2 = 0;
2008 let mut sc = 0;
2009 while sc < nelim {
2010 match block_d.pivot_type(sc) {
2011 PivotType::OneByOne => {
2012 passed_1x1 += 1;
2013 sc += 1;
2014 }
2015 PivotType::TwoByTwo { partner } if partner > sc => {
2016 passed_2x2 += 1;
2017 sc += 2;
2018 }
2019 _ => {
2020 sc += 1;
2021 }
2022 }
2023 }
2024 stats.num_1x1 += passed_1x1;
2025 stats.num_2x2 += passed_2x2;
2026 if block_stats.max_l_entry > stats.max_l_entry {
2027 stats.max_l_entry = block_stats.max_l_entry;
2028 }
2029 for c in 0..nelim {
2031 for i in panel_start..m {
2032 let v = a[(i, k + c)].abs();
2033 if v > stats.max_l_entry {
2034 stats.max_l_entry = v;
2035 }
2036 }
2037 }
2038
2039 for record in &block_log {
2041 if !matches!(record.pivot_type, PivotType::Delayed) && record.col < nelim {
2042 let global_col = col_order[k + record.col];
2043 let global_pivot_type = match record.pivot_type {
2044 PivotType::TwoByTwo { partner } => PivotType::TwoByTwo {
2045 partner: col_order[k + partner],
2046 },
2047 other => other,
2048 };
2049 pivot_log.push(AptpPivotRecord {
2050 col: global_col,
2051 pivot_type: global_pivot_type,
2052 max_l_entry: record.max_l_entry,
2053 was_fallback: record.was_fallback,
2054 });
2055 }
2056 }
2057
2058 if nelim < block_size {
2060 let n_delayed = block_size - nelim;
2061 for i in (0..n_delayed).rev() {
2062 let delayed_pos = k + nelim + i;
2063 end_pos -= 1;
2064 if delayed_pos < end_pos {
2065 swap_symmetric(a.rb_mut(), delayed_pos, end_pos);
2066 col_order.swap(delayed_pos, end_pos);
2067 }
2068 stats.num_delayed += 1;
2069 pivot_log.push(AptpPivotRecord {
2070 col: col_order[end_pos],
2071 pivot_type: PivotType::Delayed,
2072 max_l_entry: 0.0,
2073 was_fallback: false,
2074 });
2075 }
2076 }
2077
2078 k += nelim;
2079 }
2080
2081 let num_eliminated = k;
2082
2083 d.truncate(num_eliminated);
2084
2085 let delayed_cols: Vec<usize> = (num_eliminated..p).map(|i| col_order[i]).collect();
2086
2087 Ok(AptpFactorResult {
2088 d,
2089 perm: col_order,
2090 num_eliminated,
2091 delayed_cols,
2092 stats,
2093 pivot_log,
2094 })
2095}
2096
2097fn two_level_factor(
2109 mut a: MatMut<'_, f64>,
2110 num_fully_summed: usize,
2111 options: &AptpOptions,
2112 kernel_ws: &mut AptpKernelWorkspace,
2113) -> Result<AptpFactorResult, SparseError> {
2114 let m = a.nrows();
2115 let nb = options.outer_block_size;
2116 let p = num_fully_summed;
2117
2118 let mut col_order: Vec<usize> = (0..m).collect();
2119 let mut global_d = MixedDiagonal::new(p);
2120 let mut stats = AptpStatistics::default();
2121 let mut pivot_log = Vec::with_capacity(p);
2122
2123 let mut global_nelim = 0;
2124 let mut remaining_fully_summed = p;
2125
2126 while remaining_fully_summed > 0 {
2127 let col_start = global_nelim;
2128 let block_cols = remaining_fully_summed.min(nb);
2129
2130 let block_m = m - col_start;
2142 let block_result = {
2143 let block_view = a
2144 .rb_mut()
2145 .submatrix_mut(col_start, col_start, block_m, block_m);
2146 factor_inner(block_view, block_cols, p - col_start, options, kernel_ws)?
2150 };
2151 let block_nelim = block_result.num_eliminated;
2152
2153 if col_start > 0 {
2162 let block_perm = &block_result.perm;
2163 let mut temp = vec![0.0f64; block_cols];
2164 for c in 0..col_start {
2165 for i in 0..block_cols {
2167 temp[i] = a[(col_start + block_perm[i], c)];
2168 }
2169 for i in 0..block_cols {
2171 a[(col_start + i, c)] = temp[i];
2172 }
2173 }
2174 }
2175
2176 {
2182 let block_perm = &block_result.perm;
2183 let orig_order: Vec<usize> = col_order[col_start..col_start + block_cols].to_vec();
2184 for i in 0..block_cols {
2185 if block_perm[i] < block_cols {
2188 col_order[col_start + i] = orig_order[block_perm[i]];
2189 }
2190 }
2191 }
2192
2193 if block_nelim < block_cols {
2196 let n_failed = block_cols - block_nelim;
2197 for i in 0..n_failed {
2198 let failed_pos = col_start + block_nelim + i;
2199 let end = col_start + remaining_fully_summed - 1 - i;
2200 if failed_pos < end {
2201 swap_symmetric(a.rb_mut(), failed_pos, end);
2202 col_order.swap(failed_pos, end);
2203 }
2204 }
2205 stats.num_delayed += n_failed;
2206 }
2207
2208 global_d.copy_from_offset(&block_result.d, global_nelim, block_nelim);
2210
2211 stats.num_1x1 += block_result.stats.num_1x1;
2213 stats.num_2x2 += block_result.stats.num_2x2;
2214 if block_result.stats.max_l_entry > stats.max_l_entry {
2215 stats.max_l_entry = block_result.stats.max_l_entry;
2216 }
2217
2218 for record in &block_result.pivot_log {
2220 if !matches!(record.pivot_type, PivotType::Delayed) {
2221 pivot_log.push(record.clone());
2222 }
2223 }
2224 let n_failed = block_cols - block_nelim;
2226 for i in 0..n_failed {
2227 let delayed_pos = col_start + remaining_fully_summed - 1 - i;
2228 pivot_log.push(AptpPivotRecord {
2229 col: col_order[delayed_pos],
2230 pivot_type: PivotType::Delayed,
2231 max_l_entry: 0.0,
2232 was_fallback: false,
2233 });
2234 }
2235
2236 global_nelim += block_nelim;
2237 remaining_fully_summed -= block_cols;
2238 }
2239
2240 global_d.truncate(global_nelim);
2241
2242 let delayed_cols: Vec<usize> = (global_nelim..p).map(|i| col_order[i]).collect();
2243
2244 Ok(AptpFactorResult {
2245 d: global_d,
2246 perm: col_order,
2247 num_eliminated: global_nelim,
2248 delayed_cols,
2249 stats,
2250 pivot_log,
2251 })
2252}
2253
2254fn tpp_test_2x2(
2269 a: MatRef<'_, f64>,
2270 t: usize,
2271 p: usize,
2272 maxt: f64,
2273 maxp: f64,
2274 u: f64,
2275 small: f64,
2276) -> Option<(f64, f64, f64)> {
2277 debug_assert!(t < p, "tpp_test_2x2 requires t < p");
2278
2279 let a11 = a[(t, t)];
2280 let a21 = a[(p, t)]; let a22 = a[(p, p)];
2282
2283 let maxpiv = a11.abs().max(a21.abs()).max(a22.abs());
2285 if maxpiv < small {
2286 return None;
2287 }
2288
2289 let detscale = 1.0 / maxpiv;
2291 let detpiv0 = (a11 * detscale) * a22;
2292 let detpiv1 = (a21 * detscale) * a21;
2293 let detpiv = detpiv0 - detpiv1;
2294 if detpiv.abs() < small.max((detpiv0 / 2.0).abs()).max((detpiv1 / 2.0).abs()) {
2295 return None;
2296 }
2297
2298 let d_inv_11 = (a22 * detscale) / detpiv;
2300 let d_inv_12 = (-a21 * detscale) / detpiv;
2301 let d_inv_22 = (a11 * detscale) / detpiv;
2302
2303 if maxt.max(maxp) < small {
2304 return Some((a11, a21, a22));
2306 }
2307
2308 let x1 = d_inv_11.abs() * maxt + d_inv_12.abs() * maxp;
2309 let x2 = d_inv_12.abs() * maxt + d_inv_22.abs() * maxp;
2310 if u * x1.max(x2) < 1.0 {
2311 Some((a11, a21, a22))
2312 } else {
2313 None
2314 }
2315}
2316
2317#[allow(clippy::too_many_arguments)]
2335fn tpp_factor(
2336 mut a: MatMut<'_, f64>,
2337 start_col: usize,
2338 num_fully_summed: usize,
2339 col_order: &mut [usize],
2340 global_d: &mut MixedDiagonal,
2341 stats: &mut AptpStatistics,
2342 pivot_log: &mut Vec<AptpPivotRecord>,
2343 options: &AptpOptions,
2344) -> usize {
2345 let m = a.nrows();
2346 let n = num_fully_summed;
2347 let u = options.threshold;
2348 let small = options.small;
2349
2350 let mut nelim = start_col;
2351
2352 while nelim < n {
2353 if tpp_is_col_small(a.as_ref(), nelim, nelim, m, small) {
2355 global_d.set_1x1(nelim, 0.0);
2357 stats.num_1x1 += 1;
2358 pivot_log.push(AptpPivotRecord {
2359 col: col_order[nelim],
2360 pivot_type: PivotType::OneByOne,
2361 max_l_entry: 0.0,
2362 was_fallback: true,
2363 });
2364 for r in (nelim + 1)..m {
2366 a[(r, nelim)] = 0.0;
2367 }
2368 nelim += 1;
2369 continue;
2370 }
2371
2372 let mut found = false;
2374 for p in (nelim + 1)..n {
2375 if tpp_is_col_small(a.as_ref(), p, nelim, m, small) {
2377 if p != nelim {
2379 swap_symmetric(a.rb_mut(), p, nelim);
2380 col_order.swap(p, nelim);
2381 }
2382 global_d.set_1x1(nelim, 0.0);
2383 stats.num_1x1 += 1;
2384 pivot_log.push(AptpPivotRecord {
2385 col: col_order[nelim],
2386 pivot_type: PivotType::OneByOne,
2387 max_l_entry: 0.0,
2388 was_fallback: true,
2389 });
2390 for r in (nelim + 1)..m {
2391 a[(r, nelim)] = 0.0;
2392 }
2393 nelim += 1;
2394 found = true;
2395 break;
2396 }
2397
2398 let t = tpp_find_row_abs_max(a.as_ref(), p, nelim, p);
2400
2401 let maxt = tpp_find_rc_abs_max_exclude(a.as_ref(), t, nelim, m, p);
2403 let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), p, nelim, m, t);
2404 if tpp_test_2x2(a.as_ref(), t, p, maxt, maxp, u, small).is_some() {
2405 if t != nelim {
2407 swap_symmetric(a.rb_mut(), t, nelim);
2408 col_order.swap(t, nelim);
2409 }
2410 let new_p = if p == nelim { t } else { p };
2412 if new_p != nelim + 1 {
2413 swap_symmetric(a.rb_mut(), new_p, nelim + 1);
2414 col_order.swap(new_p, nelim + 1);
2415 }
2416
2417 let d11 = a[(nelim, nelim)];
2419 let d21 = a[(nelim + 1, nelim)];
2420 let d22 = a[(nelim + 1, nelim + 1)];
2421
2422 let max_l = tpp_apply_2x2(a.rb_mut(), nelim, m, n);
2424
2425 global_d.set_2x2(Block2x2 {
2427 first_col: nelim,
2428 a: d11,
2429 b: d21,
2430 c: d22,
2431 });
2432 stats.num_2x2 += 1;
2433 stats.max_l_entry = stats.max_l_entry.max(max_l);
2434 pivot_log.push(AptpPivotRecord {
2435 col: col_order[nelim],
2436 pivot_type: PivotType::TwoByTwo {
2437 partner: col_order[nelim + 1],
2438 },
2439 max_l_entry: max_l,
2440 was_fallback: true,
2441 });
2442 pivot_log.push(AptpPivotRecord {
2443 col: col_order[nelim + 1],
2444 pivot_type: PivotType::TwoByTwo {
2445 partner: col_order[nelim],
2446 },
2447 max_l_entry: max_l,
2448 was_fallback: true,
2449 });
2450 nelim += 2;
2451 found = true;
2452 break;
2453 }
2454
2455 let maxp_with_t = maxp.max(tpp_sym_entry(a.as_ref(), p, t).abs());
2458 if a[(p, p)].abs() >= u * maxp_with_t {
2459 if p != nelim {
2461 swap_symmetric(a.rb_mut(), p, nelim);
2462 col_order.swap(p, nelim);
2463 }
2464
2465 let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
2466
2467 global_d.set_1x1(nelim, a[(nelim, nelim)]);
2468 stats.num_1x1 += 1;
2469 stats.max_l_entry = stats.max_l_entry.max(max_l);
2470 pivot_log.push(AptpPivotRecord {
2471 col: col_order[nelim],
2472 pivot_type: PivotType::OneByOne,
2473 max_l_entry: max_l,
2474 was_fallback: true,
2475 });
2476 nelim += 1;
2477 found = true;
2478 break;
2479 }
2480 }
2481
2482 if !found {
2483 let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), nelim, nelim, m, usize::MAX);
2485 if a[(nelim, nelim)].abs() >= u * maxp {
2486 let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
2487
2488 global_d.set_1x1(nelim, a[(nelim, nelim)]);
2489 stats.num_1x1 += 1;
2490 stats.max_l_entry = stats.max_l_entry.max(max_l);
2491 pivot_log.push(AptpPivotRecord {
2492 col: col_order[nelim],
2493 pivot_type: PivotType::OneByOne,
2494 max_l_entry: max_l,
2495 was_fallback: true,
2496 });
2497 nelim += 1;
2498 } else {
2499 break;
2501 }
2502 }
2503 }
2504
2505 nelim - start_col
2506}
2507
2508fn swap_rect(mut a: MatMut<'_, f64>, i: usize, j: usize) {
2522 if i == j {
2523 return;
2524 }
2525 let (i, j) = if i < j { (i, j) } else { (j, i) };
2526 let n = a.ncols();
2527
2528 if i < n && j < n {
2530 let tmp = a[(i, i)];
2531 a[(i, i)] = a[(j, j)];
2532 a[(j, j)] = tmp;
2533 }
2534
2535 for k in 0..i.min(n) {
2537 let tmp = a[(i, k)];
2538 a[(i, k)] = a[(j, k)];
2539 a[(j, k)] = tmp;
2540 }
2541
2542 for k in (i + 1)..j {
2545 if k < n {
2546 let tmp = a[(k, i)];
2547 a[(k, i)] = a[(j, k)];
2548 a[(j, k)] = tmp;
2549 } else if i < n {
2550 break;
2554 }
2555 }
2556
2557 if i < n && j < n {
2560 for k in (j + 1)..a.nrows() {
2561 let tmp = a[(k, i)];
2562 a[(k, i)] = a[(k, j)];
2563 a[(k, j)] = tmp;
2564 }
2565 }
2566
2567 }
2569
2570#[inline]
2577fn tpp_sym_entry(a: MatRef<'_, f64>, row: usize, col: usize) -> f64 {
2578 let n = a.ncols();
2579 if col < n && row >= col {
2580 a[(row, col)]
2581 } else if row < n && col >= row {
2582 a[(col, row)]
2583 } else {
2584 panic!("tpp_sym_entry: row={} col={} both >= ncols={}", row, col, n);
2585 }
2586}
2587
2588fn tpp_apply_1x1(mut a: MatMut<'_, f64>, nelim: usize, m: usize, num_fully_summed: usize) -> f64 {
2596 let d = a[(nelim, nelim)];
2597 let inv_d = 1.0 / d;
2598 let p = num_fully_summed;
2599
2600 let mut max_l = 0.0_f64;
2602 for i in (nelim + 1)..m {
2603 let l_ik = a[(i, nelim)] * inv_d;
2604 a[(i, nelim)] = l_ik;
2605 max_l = max_l.max(l_ik.abs());
2606 }
2607
2608 let schur_col_end = p.min(a.ncols());
2611 for j in (nelim + 1)..schur_col_end {
2612 let ldlj = a[(j, nelim)] * d;
2613 for i in j..m {
2614 a[(i, j)] -= a[(i, nelim)] * ldlj;
2615 }
2616 }
2617
2618 max_l
2619}
2620
2621fn tpp_apply_2x2(mut a: MatMut<'_, f64>, nelim: usize, m: usize, num_fully_summed: usize) -> f64 {
2629 let a11 = a[(nelim, nelim)];
2630 let a21 = a[(nelim + 1, nelim)];
2631 let a22 = a[(nelim + 1, nelim + 1)];
2632 let det = a11 * a22 - a21 * a21;
2633 let inv_det = 1.0 / det;
2634 let p = num_fully_summed;
2635
2636 let mut max_l = 0.0_f64;
2638 let start = nelim + 2;
2639 for i in start..m {
2640 let ai1 = a[(i, nelim)];
2641 let ai2 = a[(i, nelim + 1)];
2642 let l_i1 = (ai1 * a22 - ai2 * a21) * inv_det;
2643 let l_i2 = (ai2 * a11 - ai1 * a21) * inv_det;
2644 a[(i, nelim)] = l_i1;
2645 a[(i, nelim + 1)] = l_i2;
2646 max_l = max_l.max(l_i1.abs()).max(l_i2.abs());
2647 }
2648
2649 let schur_col_end = p.min(a.ncols());
2652 for j in start..schur_col_end {
2653 let wj1 = a[(j, nelim)] * a11 + a[(j, nelim + 1)] * a21;
2654 let wj2 = a[(j, nelim)] * a21 + a[(j, nelim + 1)] * a22;
2655 for i in j..m {
2656 a[(i, j)] -= a[(i, nelim)] * wj1 + a[(i, nelim + 1)] * wj2;
2657 }
2658 }
2659
2660 a[(nelim + 1, nelim)] = 0.0;
2662
2663 max_l
2664}
2665
2666fn tpp_is_col_small(a: MatRef<'_, f64>, idx: usize, from: usize, to: usize, small: f64) -> bool {
2671 let n = a.ncols();
2672 for c in from..idx.min(n) {
2674 if a[(idx, c)].abs() >= small {
2675 return false;
2676 }
2677 }
2678 if idx < n {
2680 for r in idx..to {
2681 if a[(r, idx)].abs() >= small {
2682 return false;
2683 }
2684 }
2685 }
2686 true
2687}
2688
2689fn tpp_find_row_abs_max(a: MatRef<'_, f64>, p: usize, from: usize, to: usize) -> usize {
2693 if from >= to {
2694 return from;
2695 }
2696 let mut best_idx = from;
2697 let mut best_val = tpp_sym_entry(a, p, from).abs();
2698 for c in (from + 1)..to {
2699 let v = tpp_sym_entry(a, p, c).abs();
2700 if v > best_val {
2701 best_idx = c;
2702 best_val = v;
2703 }
2704 }
2705 best_idx
2706}
2707
2708fn tpp_find_rc_abs_max_exclude(
2712 a: MatRef<'_, f64>,
2713 col: usize,
2714 nelim: usize,
2715 m: usize,
2716 exclude: usize,
2717) -> f64 {
2718 let n = a.ncols();
2719 let mut best = 0.0_f64;
2720 for c in nelim..col.min(n) {
2722 if c == exclude {
2723 continue;
2724 }
2725 best = best.max(a[(col, c)].abs());
2726 }
2727 if col < n {
2729 for r in (col + 1)..m {
2730 if r == exclude {
2731 continue;
2732 }
2733 best = best.max(a[(r, col)].abs());
2734 }
2735 }
2736 best
2737}
2738
2739pub(super) fn tpp_factor_rect(
2762 mut a: MatMut<'_, f64>,
2763 num_fully_summed: usize,
2764 options: &AptpOptions,
2765) -> Result<AptpFactorResult, SparseError> {
2766 let m = a.nrows();
2767 let n = num_fully_summed;
2768
2769 debug_assert!(
2770 a.ncols() >= n,
2771 "tpp_factor_rect: ncols {} < num_fully_summed {}",
2772 a.ncols(),
2773 n
2774 );
2775 debug_assert!(m >= n, "tpp_factor_rect: nrows {} < ncols {}", m, n);
2776
2777 let u = options.threshold;
2778 let small = options.small;
2779
2780 let mut col_order: Vec<usize> = (0..m).collect();
2781 let mut d = MixedDiagonal::new(n);
2782 let mut stats = AptpStatistics::default();
2783 let mut pivot_log = Vec::with_capacity(n);
2784
2785 let mut nelim = 0;
2786
2787 while nelim < n {
2788 if tpp_is_col_small(a.as_ref(), nelim, nelim, m, small) {
2790 d.set_1x1(nelim, 0.0);
2791 stats.num_1x1 += 1;
2792 pivot_log.push(AptpPivotRecord {
2793 col: col_order[nelim],
2794 pivot_type: PivotType::OneByOne,
2795 max_l_entry: 0.0,
2796 was_fallback: false,
2797 });
2798 if nelim < a.ncols() {
2799 for r in (nelim + 1)..m {
2800 a[(r, nelim)] = 0.0;
2801 }
2802 }
2803 nelim += 1;
2804 continue;
2805 }
2806
2807 let mut found = false;
2809 for p in (nelim + 1)..n {
2810 if tpp_is_col_small(a.as_ref(), p, nelim, m, small) {
2812 if p != nelim {
2813 swap_rect(a.rb_mut(), p, nelim);
2814 col_order.swap(p, nelim);
2815 }
2816 d.set_1x1(nelim, 0.0);
2817 stats.num_1x1 += 1;
2818 pivot_log.push(AptpPivotRecord {
2819 col: col_order[nelim],
2820 pivot_type: PivotType::OneByOne,
2821 max_l_entry: 0.0,
2822 was_fallback: false,
2823 });
2824 for r in (nelim + 1)..m {
2825 a[(r, nelim)] = 0.0;
2826 }
2827 nelim += 1;
2828 found = true;
2829 break;
2830 }
2831
2832 let t = tpp_find_row_abs_max(a.as_ref(), p, nelim, p);
2834
2835 let maxt = tpp_find_rc_abs_max_exclude(a.as_ref(), t, nelim, m, p);
2837 let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), p, nelim, m, t);
2838 if tpp_test_2x2(a.as_ref(), t, p, maxt, maxp, u, small).is_some() {
2841 if t != nelim {
2843 swap_rect(a.rb_mut(), t, nelim);
2844 col_order.swap(t, nelim);
2845 }
2846 let new_p = if p == nelim { t } else { p };
2847 if new_p != nelim + 1 {
2848 swap_rect(a.rb_mut(), new_p, nelim + 1);
2849 col_order.swap(new_p, nelim + 1);
2850 }
2851
2852 let d11 = a[(nelim, nelim)];
2853 let d21 = a[(nelim + 1, nelim)];
2854 let d22 = a[(nelim + 1, nelim + 1)];
2855
2856 let max_l = tpp_apply_2x2(a.rb_mut(), nelim, m, n);
2857
2858 d.set_2x2(Block2x2 {
2859 first_col: nelim,
2860 a: d11,
2861 b: d21,
2862 c: d22,
2863 });
2864 stats.num_2x2 += 1;
2865 stats.max_l_entry = stats.max_l_entry.max(max_l);
2866 pivot_log.push(AptpPivotRecord {
2867 col: col_order[nelim],
2868 pivot_type: PivotType::TwoByTwo {
2869 partner: col_order[nelim + 1],
2870 },
2871 max_l_entry: max_l,
2872 was_fallback: false,
2873 });
2874 pivot_log.push(AptpPivotRecord {
2875 col: col_order[nelim + 1],
2876 pivot_type: PivotType::TwoByTwo {
2877 partner: col_order[nelim],
2878 },
2879 max_l_entry: max_l,
2880 was_fallback: false,
2881 });
2882 nelim += 2;
2883 found = true;
2884 break;
2885 }
2886
2887 let maxp_with_t = maxp.max(tpp_sym_entry(a.as_ref(), p, t).abs());
2889 if a[(p, p)].abs() >= u * maxp_with_t {
2890 if p != nelim {
2891 swap_rect(a.rb_mut(), p, nelim);
2892 col_order.swap(p, nelim);
2893 }
2894
2895 let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
2896
2897 d.set_1x1(nelim, a[(nelim, nelim)]);
2898 stats.num_1x1 += 1;
2899 stats.max_l_entry = stats.max_l_entry.max(max_l);
2900 pivot_log.push(AptpPivotRecord {
2901 col: col_order[nelim],
2902 pivot_type: PivotType::OneByOne,
2903 max_l_entry: max_l,
2904 was_fallback: false,
2905 });
2906 nelim += 1;
2907 found = true;
2908 break;
2909 }
2910 }
2911
2912 if !found {
2913 let maxp = tpp_find_rc_abs_max_exclude(a.as_ref(), nelim, nelim, m, usize::MAX);
2915 if a[(nelim, nelim)].abs() >= u * maxp {
2916 let max_l = tpp_apply_1x1(a.rb_mut(), nelim, m, n);
2917
2918 d.set_1x1(nelim, a[(nelim, nelim)]);
2919 stats.num_1x1 += 1;
2920 stats.max_l_entry = stats.max_l_entry.max(max_l);
2921 pivot_log.push(AptpPivotRecord {
2922 col: col_order[nelim],
2923 pivot_type: PivotType::OneByOne,
2924 max_l_entry: max_l,
2925 was_fallback: false,
2926 });
2927 nelim += 1;
2928 } else {
2929 break;
2930 }
2931 }
2932 }
2933
2934 d.truncate(nelim);
2935 stats.num_delayed = n - nelim;
2936
2937 let delayed_cols: Vec<usize> = (nelim..n).map(|i| col_order[i]).collect();
2938
2939 Ok(AptpFactorResult {
2940 d,
2941 perm: col_order,
2942 num_eliminated: nelim,
2943 delayed_cols,
2944 stats,
2945 pivot_log,
2946 })
2947}
2948
2949#[allow(clippy::too_many_arguments)]
2965pub(super) fn compute_contribution_gemm_rect(
2966 l_storage: &Mat<f64>,
2967 num_fully_summed: usize,
2968 num_eliminated: usize,
2969 m: usize,
2970 d: &MixedDiagonal,
2971 contrib_buffer: &mut Mat<f64>,
2972 ld_workspace: &mut Mat<f64>,
2973 par: Par,
2974) {
2975 let p = num_fully_summed;
2976 let ne = num_eliminated;
2977 let nfs = m - p;
2978
2979 if nfs == 0 || ne == 0 {
2980 return;
2981 }
2982
2983 let l21_nfs = l_storage.as_ref().submatrix(p, 0, nfs, ne);
2985
2986 if ld_workspace.nrows() < nfs || ld_workspace.ncols() < ne {
2988 *ld_workspace = Mat::zeros(nfs.max(ld_workspace.nrows()), ne.max(ld_workspace.ncols()));
2989 }
2990 let mut w = ld_workspace.as_mut().submatrix_mut(0, 0, nfs, ne);
2991 w.fill(0.0);
2992 compute_ld_into(l21_nfs, d, ne, w.rb_mut());
2993
2994 tri_matmul::matmul_with_conj(
2996 contrib_buffer.as_mut().submatrix_mut(0, 0, nfs, nfs),
2997 BlockStructure::TriangularLower,
2998 Accum::Add,
2999 w.as_ref(),
3000 BlockStructure::Rectangular,
3001 Conj::No,
3002 l21_nfs.transpose(),
3003 BlockStructure::Rectangular,
3004 Conj::No,
3005 -1.0,
3006 par,
3007 );
3008}
3009
3010pub(super) fn extract_front_factors_rect(
3016 l_storage: &Mat<f64>,
3017 m: usize,
3018 k: usize,
3019 frontal_row_indices: &[usize],
3020 result: &AptpFactorResult,
3021) -> super::numeric::FrontFactors {
3022 let ne = result.num_eliminated;
3023
3024 let l11 = if ne > 0 {
3026 let mut l = Mat::zeros(ne, ne);
3027 let mut col = 0;
3028 while col < ne {
3029 l[(col, col)] = 1.0;
3030 match result.d.pivot_type(col) {
3031 PivotType::OneByOne => {
3032 let n_entries = ne - (col + 1);
3033 if n_entries > 0 {
3034 let src = &l_storage.col_as_slice(col)[col + 1..ne];
3035 l.col_as_slice_mut(col)[col + 1..ne].copy_from_slice(src);
3036 }
3037 col += 1;
3038 }
3039 PivotType::TwoByTwo { partner } if partner > col => {
3040 l[(col + 1, col + 1)] = 1.0;
3041 let n_entries = ne - (col + 2);
3042 if n_entries > 0 {
3043 let src0 = &l_storage.col_as_slice(col)[col + 2..ne];
3044 l.col_as_slice_mut(col)[col + 2..ne].copy_from_slice(src0);
3045 let src1 = &l_storage.col_as_slice(col + 1)[col + 2..ne];
3046 l.col_as_slice_mut(col + 1)[col + 2..ne].copy_from_slice(src1);
3047 }
3048 col += 2;
3049 }
3050 PivotType::TwoByTwo { .. } => {
3051 col += 1;
3052 }
3053 PivotType::Delayed => {
3054 unreachable!("unexpected Delayed pivot at col {} in 0..ne", col);
3055 }
3056 }
3057 }
3058 l
3059 } else {
3060 Mat::zeros(0, 0)
3061 };
3062
3063 let mut d11 = MixedDiagonal::new(ne);
3065 let mut col = 0;
3066 while col < ne {
3067 match result.d.pivot_type(col) {
3068 PivotType::OneByOne => {
3069 d11.set_1x1(col, result.d.diagonal_1x1(col));
3070 col += 1;
3071 }
3072 PivotType::TwoByTwo { partner: _ } => {
3073 let block = result.d.diagonal_2x2(col);
3074 d11.set_2x2(Block2x2 {
3075 first_col: col,
3076 a: block.a,
3077 b: block.b,
3078 c: block.c,
3079 });
3080 col += 2;
3081 }
3082 PivotType::Delayed => {
3083 unreachable!("unexpected Delayed pivot at col {} in 0..ne", col);
3084 }
3085 }
3086 }
3087
3088 let r = m - ne;
3090 let l21 = if ne > 0 && r > 0 {
3091 let mut l = Mat::zeros(r, ne);
3092 for j in 0..ne {
3093 let src = &l_storage.col_as_slice(j)[ne..m];
3094 l.col_as_slice_mut(j)[..r].copy_from_slice(src);
3095 }
3096 l
3097 } else {
3098 Mat::zeros(r, ne)
3099 };
3100
3101 let local_perm = result.perm[..k].to_vec();
3103 let col_indices: Vec<usize> = local_perm[..ne]
3104 .iter()
3105 .map(|&lp| frontal_row_indices[lp])
3106 .collect();
3107
3108 let mut row_indices = Vec::with_capacity(m - ne);
3109 for &lp in &result.perm[ne..k] {
3110 row_indices.push(frontal_row_indices[lp]);
3111 }
3112 row_indices.extend_from_slice(&frontal_row_indices[k..m]);
3113
3114 super::numeric::FrontFactors {
3115 l11,
3116 d11,
3117 l21,
3118 local_perm,
3119 num_eliminated: ne,
3120 col_indices,
3121 row_indices,
3122 }
3123}
3124
3125pub(super) fn extract_contribution_rect(
3132 l_storage: &Mat<f64>,
3133 m: usize,
3134 k: usize,
3135 frontal_row_indices: &[usize],
3136 result: &AptpFactorResult,
3137 mut contrib_buffer: Mat<f64>,
3138) -> super::numeric::ContributionBlock {
3139 let ne = result.num_eliminated;
3140 let size = m - ne;
3141 let num_delayed = k - ne;
3142 let nfs = m - k;
3143
3144 if num_delayed > 0 {
3145 let mut data = Mat::zeros(size, size);
3146
3147 for j in 0..num_delayed {
3149 let col_len = num_delayed - j;
3150 let src = &l_storage.col_as_slice(ne + j)[ne + j..ne + j + col_len];
3151 data.col_as_slice_mut(j)[j..j + col_len].copy_from_slice(src);
3152 }
3153
3154 for j in 0..num_delayed {
3156 let src = &l_storage.col_as_slice(ne + j)[k..m];
3157 data.col_as_slice_mut(j)[num_delayed..size].copy_from_slice(src);
3158 }
3159
3160 for j in 0..nfs {
3162 let col_len = nfs - j;
3163 let src = &contrib_buffer.col_as_slice(j)[j..j + col_len];
3164 data.col_as_slice_mut(num_delayed + j)[num_delayed + j..size].copy_from_slice(src);
3165 }
3166
3167 contrib_buffer = data;
3168 }
3169
3170 let mut row_indices = Vec::with_capacity(size);
3171 for &lp in &result.perm[ne..k] {
3172 row_indices.push(frontal_row_indices[lp]);
3173 }
3174 row_indices.extend_from_slice(&frontal_row_indices[k..m]);
3175
3176 super::numeric::ContributionBlock {
3177 data: contrib_buffer,
3178 row_indices,
3179 num_delayed,
3180 }
3181}
3182
3183#[cfg(test)]
3184mod tests {
3185 use super::*;
3186 use faer::Mat;
3187
3188 fn reconstruct_dense_ldlt(l: &Mat<f64>, d: &MixedDiagonal, perm: &[usize]) -> Mat<f64> {
3192 let n = l.nrows();
3193
3194 let mut d_mat = Mat::zeros(n, n);
3196 let nd = d.dimension();
3197 let mut col = 0;
3198 while col < nd {
3199 match d.pivot_type(col) {
3200 PivotType::OneByOne => {
3201 d_mat[(col, col)] = d.diagonal_1x1(col);
3202 col += 1;
3203 }
3204 PivotType::TwoByTwo { partner } if partner > col => {
3205 let block = d.diagonal_2x2(col);
3206 d_mat[(col, col)] = block.a;
3207 d_mat[(col, col + 1)] = block.b;
3208 d_mat[(col + 1, col)] = block.b;
3209 d_mat[(col + 1, col + 1)] = block.c;
3210 col += 2;
3211 }
3212 _ => {
3213 col += 1;
3214 }
3215 }
3216 }
3217
3218 let mut w = Mat::zeros(n, n);
3220 for i in 0..n {
3221 for j in 0..n {
3222 let mut sum = 0.0;
3223 for k in 0..n {
3224 sum += l[(i, k)] * d_mat[(k, j)];
3225 }
3226 w[(i, j)] = sum;
3227 }
3228 }
3229
3230 let mut ldlt = Mat::zeros(n, n);
3232 for i in 0..n {
3233 for j in 0..n {
3234 let mut sum = 0.0;
3235 for k in 0..n {
3236 sum += w[(i, k)] * l[(j, k)];
3237 }
3238 ldlt[(i, j)] = sum;
3239 }
3240 }
3241
3242 let mut result = Mat::zeros(n, n);
3244 for i in 0..n {
3245 for j in 0..n {
3246 result[(perm[i], perm[j])] = ldlt[(i, j)];
3247 }
3248 }
3249
3250 result
3251 }
3252
3253 fn dense_reconstruction_error(
3255 original: &Mat<f64>,
3256 l: &Mat<f64>,
3257 d: &MixedDiagonal,
3258 perm: &[usize],
3259 ) -> f64 {
3260 let reconstructed = reconstruct_dense_ldlt(l, d, perm);
3261 let n = original.nrows();
3262
3263 let mut diff_norm_sq = 0.0_f64;
3264 let mut orig_norm_sq = 0.0_f64;
3265
3266 for i in 0..n {
3267 for j in 0..n {
3268 let diff = original[(i, j)] - reconstructed[(i, j)];
3269 diff_norm_sq += diff * diff;
3270 orig_norm_sq += original[(i, j)] * original[(i, j)];
3271 }
3272 }
3273
3274 if orig_norm_sq == 0.0 {
3275 return diff_norm_sq.sqrt();
3276 }
3277 (diff_norm_sq / orig_norm_sq).sqrt()
3278 }
3279
3280 fn symmetric_matrix(n: usize, f: impl Fn(usize, usize) -> f64) -> Mat<f64> {
3281 Mat::from_fn(n, n, |i, j| if i >= j { f(i, j) } else { f(j, i) })
3282 }
3283
3284 #[test]
3287 fn test_reconstruction_trivial_identity() {
3288 let l = Mat::identity(2, 2);
3289 let mut d = MixedDiagonal::new(2);
3290 d.set_1x1(0, 1.0);
3291 d.set_1x1(1, 1.0);
3292 let perm = vec![0, 1];
3293
3294 let result = reconstruct_dense_ldlt(&l, &d, &perm);
3295 for i in 0..2 {
3296 for j in 0..2 {
3297 let expected = if i == j { 1.0 } else { 0.0 };
3298 assert!(
3299 (result[(i, j)] - expected).abs() < 1e-14,
3300 "({},{}) = {}, expected {}",
3301 i,
3302 j,
3303 result[(i, j)],
3304 expected
3305 );
3306 }
3307 }
3308 }
3309
3310 fn complete_pivoting_factor_and_extract(
3314 a: &Mat<f64>,
3315 ) -> (Mat<f64>, MixedDiagonal, Vec<usize>, AptpStatistics) {
3316 let mut a_copy = a.clone();
3317 let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
3318 let l = extract_l(a_copy.as_ref(), &result.d, result.num_eliminated);
3319 (l, result.d, result.perm, result.stats)
3320 }
3321
3322 #[test]
3323 fn test_cp_identity() {
3324 let a = Mat::identity(3, 3);
3326 let mut a_copy = a.clone();
3327 let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
3328
3329 assert_eq!(result.num_eliminated, 3);
3330 assert_eq!(result.stats.num_1x1, 3);
3331 assert_eq!(result.stats.num_2x2, 0);
3332 assert_eq!(result.stats.num_delayed, 0);
3333
3334 for i in 0..3 {
3336 assert!((result.d.diagonal_1x1(i) - 1.0).abs() < 1e-14);
3337 }
3338
3339 assert!(result.stats.max_l_entry < 1e-14);
3341 }
3342
3343 #[test]
3344 fn test_cp_diagonal_pivot_ordering() {
3345 let a = symmetric_matrix(3, |i, j| if i == j { [2.0, 5.0, 3.0][i] } else { 0.0 });
3347 let mut a_copy = a.clone();
3348 let result = complete_pivoting_factor(a_copy.as_mut(), 1e-20);
3349
3350 assert_eq!(result.num_eliminated, 3);
3351 assert_eq!(result.stats.num_1x1, 3);
3352
3353 assert!(
3355 (result.d.diagonal_1x1(0) - 5.0).abs() < 1e-14,
3356 "first pivot should be 5.0, got {}",
3357 result.d.diagonal_1x1(0)
3358 );
3359 assert!(
3360 (result.d.diagonal_1x1(1) - 3.0).abs() < 1e-14,
3361 "second pivot should be 3.0, got {}",
3362 result.d.diagonal_1x1(1)
3363 );
3364 assert!(
3365 (result.d.diagonal_1x1(2) - 2.0).abs() < 1e-14,
3366 "third pivot should be 2.0, got {}",
3367 result.d.diagonal_1x1(2)
3368 );
3369 }
3370
3371 #[test]
3372 fn test_cp_2x2_pivot() {
3373 let a = symmetric_matrix(4, |i, j| {
3376 let vals = [
3377 [0.1, 10.0, 0.0, 0.0],
3378 [10.0, 0.1, 0.0, 0.0],
3379 [0.0, 0.0, 5.0, 1.0],
3380 [0.0, 0.0, 1.0, 3.0],
3381 ];
3382 vals[i][j]
3383 });
3384
3385 let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
3386
3387 assert!(
3388 stats.num_2x2 >= 1,
3389 "expected at least one 2×2 pivot, got {}",
3390 stats.num_2x2
3391 );
3392 assert!(
3394 stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
3395 "L entries should be bounded by 4, got {}",
3396 stats.max_l_entry
3397 );
3398
3399 let error = dense_reconstruction_error(&a, &l, &d, &perm);
3401 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3402 }
3403
3404 #[test]
3405 fn test_cp_failed_2x2_fallback() {
3406 let a = symmetric_matrix(4, |i, j| {
3423 let vals = [
3424 [2.0, 2.0, 0.1, 0.1],
3425 [2.0, 2.0, 0.1, 0.1],
3426 [0.1, 0.1, 5.0, 0.0],
3427 [0.1, 0.1, 0.0, 3.0],
3428 ];
3429 vals[i][j]
3430 });
3431
3432 let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
3433
3434 assert!(
3437 stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
3438 "L entries should be bounded by 4"
3439 );
3440
3441 let error = dense_reconstruction_error(&a, &l, &d, &perm);
3443 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3444 }
3445
3446 #[test]
3447 fn test_cp_singular_block() {
3448 let mut a = Mat::zeros(3, 3);
3450 a[(0, 0)] = 1e-25;
3451 a[(1, 1)] = 1e-25;
3452 a[(2, 2)] = 1e-25;
3453
3454 let result = complete_pivoting_factor(a.as_mut(), 1e-20);
3455
3456 assert_eq!(
3458 result.num_eliminated, 0,
3459 "near-singular block should have 0 eliminations"
3460 );
3461 assert_eq!(result.stats.num_delayed, 3);
3462 }
3463
3464 #[test]
3465 fn test_cp_reconstruction_random() {
3466 let sizes = [8, 16, 32];
3469 for &n in &sizes {
3470 let a = symmetric_matrix(n, |i, j| {
3471 let seed = (i * 1000 + j * 7 + 13) as f64;
3473 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
3474 if i == j {
3475 val * 10.0 } else {
3477 val
3478 }
3479 });
3480
3481 let (l, d, perm, stats) = complete_pivoting_factor_and_extract(&a);
3482
3483 assert_eq!(
3485 stats.num_1x1 + 2 * stats.num_2x2 + stats.num_delayed,
3486 n,
3487 "statistics invariant for {}x{}",
3488 n,
3489 n
3490 );
3491
3492 if stats.num_delayed == 0 {
3493 let error = dense_reconstruction_error(&a, &l, &d, &perm);
3494 assert!(
3495 error < 1e-12,
3496 "complete pivoting {}x{}: reconstruction error {:.2e} >= 1e-12",
3497 n,
3498 n,
3499 error
3500 );
3501 }
3502
3503 assert!(
3505 stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
3506 "complete pivoting {}x{}: max_l_entry {:.2e} > 4",
3507 n,
3508 n,
3509 stats.max_l_entry
3510 );
3511 }
3512 }
3513
3514 #[test]
3515 fn test_1x1_trivial_diagonal() {
3516 let a = symmetric_matrix(2, |i, j| if i == j { [4.0, 9.0][i] } else { 0.0 });
3517
3518 let opts = AptpOptions {
3519 fallback: AptpFallback::Delay,
3520 ..AptpOptions::default()
3521 };
3522 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3523
3524 assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 2);
3526 assert_eq!(result.stats.num_delayed, 0);
3527 assert!(result.delayed_cols.is_empty());
3528
3529 let error =
3530 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
3531 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3532 }
3533
3534 #[test]
3535 fn test_1x1_positive_definite_3x3() {
3536 let a = symmetric_matrix(3, |i, j| {
3537 let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 3.0], [1.0, 3.0, 6.0]];
3538 vals[i][j]
3539 });
3540
3541 let opts = AptpOptions::default();
3542 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3543
3544 assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 3);
3546 assert_eq!(result.stats.num_delayed, 0);
3547
3548 let error =
3549 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
3550 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3551 }
3552
3553 #[test]
3554 fn test_all_delayed_zero_matrix() {
3555 let n = 4;
3556 let a = Mat::zeros(n, n);
3557
3558 let opts = AptpOptions {
3559 failed_pivot_method: FailedPivotMethod::Pass,
3560 ..AptpOptions::default()
3561 };
3562 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3563
3564 let total = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3569 assert_eq!(total, n, "total pivots + delays should equal n");
3570 }
3571
3572 #[test]
3573 fn test_1x1_singularity_detection() {
3574 let a = symmetric_matrix(3, |i, j| if i == j { [4.0, 1e-25, 9.0][i] } else { 0.0 });
3575
3576 let opts = AptpOptions {
3577 fallback: AptpFallback::Delay,
3578 failed_pivot_method: FailedPivotMethod::Pass,
3579 ..AptpOptions::default()
3580 };
3581 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3582
3583 let eliminated = result.stats.num_1x1 + 2 * result.stats.num_2x2;
3587 assert!(
3588 eliminated >= 2,
3589 "should eliminate at least 2 columns, got {}",
3590 eliminated
3591 );
3592 let total = eliminated + result.stats.num_delayed;
3593 assert_eq!(total, 3, "total pivots + delays should equal n");
3594 }
3595
3596 #[test]
3597 fn test_stability_bound_enforced() {
3598 let a = symmetric_matrix(2, |i, j| {
3602 let vals = [[1e-4, 1.0], [1.0, 1.0]];
3603 vals[i][j]
3604 });
3605
3606 let opts = AptpOptions {
3607 fallback: AptpFallback::Delay,
3608 ..AptpOptions::default()
3609 };
3610 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3611
3612 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3614 assert_eq!(sum, 2);
3615 let error =
3617 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
3618 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3619 }
3620
3621 #[test]
3622 fn test_1x1_matrix() {
3623 let a = Mat::from_fn(1, 1, |_, _| 5.0);
3624
3625 let opts = AptpOptions::default();
3626 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3627
3628 assert_eq!(result.stats.num_1x1, 1);
3629 assert_eq!(result.stats.num_delayed, 0);
3630
3631 let error =
3632 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
3633 assert!(error < 1e-14, "reconstruction error {:.2e}", error);
3634 }
3635
3636 #[test]
3637 fn test_statistics_sum_invariant() {
3638 let a = symmetric_matrix(5, |i, j| {
3639 let vals = [
3640 [10.0, 1.0, 0.0, 0.0, 0.0],
3641 [1.0, 20.0, 2.0, 0.0, 0.0],
3642 [0.0, 2.0, 30.0, 3.0, 0.0],
3643 [0.0, 0.0, 3.0, 40.0, 4.0],
3644 [0.0, 0.0, 0.0, 4.0, 50.0],
3645 ];
3646 vals[i][j]
3647 });
3648
3649 let opts = AptpOptions::default();
3650 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3651
3652 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3653 assert_eq!(sum, 5, "statistics sum {} != n=5", sum);
3654 }
3655
3656 #[test]
3657 fn test_2x2_pivot_known_indefinite() {
3658 let a = symmetric_matrix(4, |i, j| {
3659 let vals = [
3660 [0.01, 10.0, 0.0, 0.0],
3661 [10.0, 0.01, 0.0, 0.0],
3662 [0.0, 0.0, 5.0, 1.0],
3663 [0.0, 0.0, 1.0, 3.0],
3664 ];
3665 vals[i][j]
3666 });
3667
3668 let opts = AptpOptions {
3669 fallback: AptpFallback::BunchKaufman,
3670 ..AptpOptions::default()
3671 };
3672 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3673
3674 assert!(
3675 result.stats.num_2x2 >= 1,
3676 "expected 2x2 pivot, got num_2x2={}",
3677 result.stats.num_2x2
3678 );
3679
3680 let error =
3681 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
3682 assert!(error < 1e-12, "reconstruction error {:.2e} >= 1e-12", error);
3683 }
3684
3685 #[test]
3686 fn test_2x2_stability_test() {
3687 let a_good = symmetric_matrix(2, |i, j| {
3688 let vals = [[1.0, 5.0], [5.0, 1.0]];
3689 vals[i][j]
3690 });
3691 let opts = AptpOptions {
3692 fallback: AptpFallback::BunchKaufman,
3693 ..AptpOptions::default()
3694 };
3695 let result = aptp_factor(a_good.as_ref(), &opts).unwrap();
3696 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3697 assert_eq!(sum, 2);
3698 }
3699
3700 #[test]
3701 fn test_bk_vs_delay_fallback() {
3702 let a = symmetric_matrix(4, |i, j| {
3703 let vals = [
3704 [0.01, 10.0, 0.0, 0.0],
3705 [10.0, 0.01, 0.0, 0.0],
3706 [0.0, 0.0, 5.0, 1.0],
3707 [0.0, 0.0, 1.0, 3.0],
3708 ];
3709 vals[i][j]
3710 });
3711
3712 let bk_opts = AptpOptions {
3713 fallback: AptpFallback::BunchKaufman,
3714 ..AptpOptions::default()
3715 };
3716 let delay_opts = AptpOptions {
3717 fallback: AptpFallback::Delay,
3718 ..AptpOptions::default()
3719 };
3720
3721 let bk_result = aptp_factor(a.as_ref(), &bk_opts).unwrap();
3722 let delay_result = aptp_factor(a.as_ref(), &delay_opts).unwrap();
3723
3724 assert!(
3725 bk_result.stats.num_delayed <= delay_result.stats.num_delayed,
3726 "BK delayed {} > Delay delayed {}",
3727 bk_result.stats.num_delayed,
3728 delay_result.stats.num_delayed
3729 );
3730
3731 if bk_result.stats.num_delayed == 0 {
3732 let error = dense_reconstruction_error(
3733 &a,
3734 &bk_result.l,
3735 &bk_result.d,
3736 bk_result.perm.as_ref().arrays().0,
3737 );
3738 assert!(error < 1e-12, "BK reconstruction error {:.2e}", error);
3739 }
3740 }
3741
3742 #[test]
3743 fn test_strict_threshold() {
3744 let a = symmetric_matrix(4, |i, j| {
3745 let vals = [
3746 [4.0, 2.0, 1.0, 0.5],
3747 [2.0, 5.0, 2.0, 1.0],
3748 [1.0, 2.0, 6.0, 2.0],
3749 [0.5, 1.0, 2.0, 7.0],
3750 ];
3751 vals[i][j]
3752 });
3753
3754 let loose = AptpOptions {
3755 threshold: 0.01,
3756 fallback: AptpFallback::Delay,
3757 ..AptpOptions::default()
3758 };
3759 let strict = AptpOptions {
3760 threshold: 0.5,
3761 fallback: AptpFallback::Delay,
3762 ..AptpOptions::default()
3763 };
3764
3765 let loose_result = aptp_factor(a.as_ref(), &loose).unwrap();
3766 let strict_result = aptp_factor(a.as_ref(), &strict).unwrap();
3767
3768 assert!(
3769 strict_result.stats.num_delayed >= loose_result.stats.num_delayed,
3770 "strict delayed {} < loose delayed {}",
3771 strict_result.stats.num_delayed,
3772 loose_result.stats.num_delayed,
3773 );
3774 }
3775
3776 #[test]
3777 fn test_permutation_valid() {
3778 let a = symmetric_matrix(4, |i, j| {
3779 let vals = [
3780 [0.01, 10.0, 0.0, 0.0],
3781 [10.0, 0.01, 0.0, 0.0],
3782 [0.0, 0.0, 5.0, 1.0],
3783 [0.0, 0.0, 1.0, 3.0],
3784 ];
3785 vals[i][j]
3786 });
3787
3788 let opts = AptpOptions {
3789 fallback: AptpFallback::BunchKaufman,
3790 ..AptpOptions::default()
3791 };
3792 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3793
3794 let (fwd, inv) = result.perm.as_ref().arrays();
3795 let n = fwd.len();
3796 assert_eq!(n, 4);
3797 let mut seen = vec![false; n];
3798 for &v in fwd {
3799 assert!(v < n, "perm value {} >= n={}", v, n);
3800 assert!(!seen[v], "duplicate perm value {}", v);
3801 seen[v] = true;
3802 }
3803 for i in 0..n {
3804 assert_eq!(inv[fwd[i]], i);
3805 assert_eq!(fwd[inv[i]], i);
3806 }
3807 }
3808
3809 #[test]
3810 fn test_pd_statistics() {
3811 let a = symmetric_matrix(4, |i, j| {
3812 let vals = [
3813 [10.0, 1.0, 0.0, 0.0],
3814 [1.0, 20.0, 2.0, 0.0],
3815 [0.0, 2.0, 30.0, 3.0],
3816 [0.0, 0.0, 3.0, 40.0],
3817 ];
3818 vals[i][j]
3819 });
3820
3821 let opts = AptpOptions::default();
3822 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3823
3824 assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 4);
3826 assert_eq!(result.stats.num_delayed, 0);
3827 assert!(result.stats.max_l_entry < 1.0 / opts.threshold);
3828 }
3829
3830 #[test]
3831 fn test_max_l_entry_accuracy() {
3832 let a = symmetric_matrix(4, |i, j| {
3833 let vals = [
3834 [4.0, 2.0, 1.0, 0.5],
3835 [2.0, 5.0, 2.0, 1.0],
3836 [1.0, 2.0, 6.0, 2.0],
3837 [0.5, 1.0, 2.0, 7.0],
3838 ];
3839 vals[i][j]
3840 });
3841
3842 let opts = AptpOptions::default();
3843 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3844
3845 let n = result.l.nrows();
3846 let mut actual_max = 0.0_f64;
3847 for j in 0..n {
3848 for i in (j + 1)..n {
3849 let val = result.l[(i, j)].abs();
3850 if val > actual_max {
3851 actual_max = val;
3852 }
3853 }
3854 }
3855
3856 assert!(
3857 (result.stats.max_l_entry - actual_max).abs() < 1e-14,
3858 "stats.max_l_entry={:.6e}, actual={:.6e}",
3859 result.stats.max_l_entry,
3860 actual_max
3861 );
3862 }
3863
3864 #[test]
3865 fn test_pivot_log_completeness() {
3866 let a = symmetric_matrix(4, |i, j| {
3867 let vals = [
3868 [4.0, 2.0, 1.0, 0.5],
3869 [2.0, 5.0, 2.0, 1.0],
3870 [1.0, 2.0, 6.0, 2.0],
3871 [0.5, 1.0, 2.0, 7.0],
3872 ];
3873 vals[i][j]
3874 });
3875
3876 let opts = AptpOptions::default();
3877 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3878
3879 assert_eq!(result.pivot_log.len(), 4);
3880 }
3881
3882 #[test]
3883 fn test_inertia_from_d() {
3884 let a = symmetric_matrix(5, |i, j| {
3885 if i == j {
3886 [3.0, -2.0, 1.0, -4.0, 5.0][i]
3887 } else {
3888 0.0
3889 }
3890 });
3891
3892 let opts = AptpOptions {
3893 fallback: AptpFallback::Delay,
3894 ..AptpOptions::default()
3895 };
3896 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3897
3898 assert_eq!(result.stats.num_delayed, 0, "should have no delays");
3899
3900 let inertia = result.d.compute_inertia();
3901 assert_eq!(inertia.positive, 3, "expected 3 positive");
3902 assert_eq!(inertia.negative, 2, "expected 2 negative");
3903 assert_eq!(inertia.zero, 0, "expected 0 zero");
3904 }
3905
3906 #[test]
3909 fn test_partial_factorization() {
3910 let n = 8;
3911 let p = 4;
3912 let a = symmetric_matrix(n, |i, j| {
3913 if i == j {
3914 10.0 + i as f64
3915 } else {
3916 1.0 / (1.0 + (i as f64 - j as f64).abs())
3917 }
3918 });
3919
3920 let opts = AptpOptions::default();
3921 let mut a_copy = a.clone();
3922 let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
3923
3924 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3925 assert_eq!(sum, p, "statistics should sum to num_fully_summed={}", p);
3926 }
3927
3928 #[test]
3929 fn test_edge_case_extreme_thresholds() {
3930 let a = symmetric_matrix(3, |i, j| {
3931 let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 2.0], [1.0, 2.0, 6.0]];
3932 vals[i][j]
3933 });
3934
3935 let loose = AptpOptions {
3936 threshold: 0.001,
3937 fallback: AptpFallback::Delay,
3938 ..AptpOptions::default()
3939 };
3940 let result_loose = aptp_factor(a.as_ref(), &loose).unwrap();
3941 assert_eq!(result_loose.stats.num_delayed, 0);
3942
3943 let strict = AptpOptions {
3944 threshold: 1.0,
3945 fallback: AptpFallback::Delay,
3946 ..AptpOptions::default()
3947 };
3948 let result_strict = aptp_factor(a.as_ref(), &strict).unwrap();
3949 let sum = result_strict.stats.num_1x1
3950 + 2 * result_strict.stats.num_2x2
3951 + result_strict.stats.num_delayed;
3952 assert_eq!(sum, 3);
3953 }
3954
3955 #[test]
3956 fn test_both_fallback_strategies_valid() {
3957 let matrices = [
3958 symmetric_matrix(3, |i, j| {
3959 let vals = [[1.0, 2.0, 0.0], [2.0, -1.0, 1.0], [0.0, 1.0, 3.0]];
3960 vals[i][j]
3961 }),
3962 symmetric_matrix(4, |i, j| {
3963 let vals = [
3964 [0.01, 10.0, 0.0, 0.0],
3965 [10.0, 0.01, 0.0, 0.0],
3966 [0.0, 0.0, 5.0, 1.0],
3967 [0.0, 0.0, 1.0, 3.0],
3968 ];
3969 vals[i][j]
3970 }),
3971 ];
3972
3973 for (idx, a) in matrices.iter().enumerate() {
3974 for fallback in [AptpFallback::BunchKaufman, AptpFallback::Delay] {
3975 let opts = AptpOptions {
3976 fallback,
3977 ..AptpOptions::default()
3978 };
3979 let result = aptp_factor(a.as_ref(), &opts).unwrap();
3980
3981 if result.stats.num_delayed == 0 {
3982 let error = dense_reconstruction_error(
3983 a,
3984 &result.l,
3985 &result.d,
3986 result.perm.as_ref().arrays().0,
3987 );
3988 assert!(
3989 error < 1e-12,
3990 "matrix {} {:?}: error {:.2e}",
3991 idx,
3992 fallback,
3993 error
3994 );
3995 }
3996
3997 let sum =
3998 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
3999 let n = a.nrows();
4000 assert_eq!(sum, n, "statistics invariant broken for matrix {}", idx);
4001 }
4002 }
4003 }
4004
4005 #[test]
4008 fn test_invalid_non_square() {
4009 let a = Mat::zeros(3, 4);
4010 let opts = AptpOptions::default();
4011 let result = aptp_factor(a.as_ref(), &opts);
4012 assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
4013 }
4014
4015 #[test]
4016 fn test_invalid_threshold() {
4017 let a = Mat::zeros(2, 2);
4018 let opts = AptpOptions {
4019 threshold: 0.0,
4020 ..AptpOptions::default()
4021 };
4022 let result = aptp_factor(a.as_ref(), &opts);
4023 assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
4024
4025 let opts2 = AptpOptions {
4026 threshold: 1.5,
4027 ..AptpOptions::default()
4028 };
4029 let result2 = aptp_factor(a.as_ref(), &opts2);
4030 assert!(matches!(result2, Err(SparseError::InvalidInput { .. })));
4031 }
4032
4033 #[test]
4034 fn test_invalid_num_fully_summed_too_large() {
4035 let mut a = Mat::zeros(3, 3);
4036 let opts = AptpOptions::default();
4037 let result = aptp_factor_in_place(a.as_mut(), 5, &opts);
4038 assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
4039 }
4040
4041 #[test]
4044 fn test_zero_fully_summed() {
4045 let a = symmetric_matrix(3, |i, j| {
4046 let vals = [[4.0, 2.0, 1.0], [2.0, 5.0, 3.0], [1.0, 3.0, 6.0]];
4047 vals[i][j]
4048 });
4049
4050 let opts = AptpOptions::default();
4051 let mut a_copy = a.clone();
4052 let result = aptp_factor_in_place(a_copy.as_mut(), 0, &opts).unwrap();
4053
4054 assert_eq!(result.num_eliminated, 0);
4055 assert_eq!(result.stats.num_1x1, 0);
4056 assert_eq!(result.stats.num_2x2, 0);
4057 assert_eq!(result.stats.num_delayed, 0);
4058 assert!(result.pivot_log.is_empty());
4059 }
4060
4061 #[test]
4062 fn test_2x2_fallback_also_fails() {
4063 let a = symmetric_matrix(3, |i, j| {
4067 let vals = [[0.001, 0.11, 0.0], [0.11, 12.0, 0.1], [0.0, 0.1, 5.0]];
4068 vals[i][j]
4069 });
4070
4071 let opts = AptpOptions {
4072 fallback: AptpFallback::BunchKaufman,
4073 ..AptpOptions::default()
4074 };
4075 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4076
4077 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
4079 assert_eq!(sum, 3, "statistics sum {} != n=3", sum);
4080
4081 if result.stats.num_delayed == 0 {
4083 let error = dense_reconstruction_error(
4084 &a,
4085 &result.l,
4086 &result.d,
4087 result.perm.as_ref().arrays().0,
4088 );
4089 assert!(error < 1e-12, "reconstruction error {:.2e}", error);
4090 }
4091
4092 assert!(
4094 result.stats.max_l_entry <= COMPLETE_PIVOTING_GROWTH_BOUND + 1e-10,
4095 "max_l_entry {} > 4",
4096 result.stats.max_l_entry
4097 );
4098 }
4099
4100 #[test]
4101 fn test_invalid_small_negative() {
4102 let a = Mat::zeros(2, 2);
4103 let opts = AptpOptions {
4104 small: -1.0,
4105 ..AptpOptions::default()
4106 };
4107 let result = aptp_factor(a.as_ref(), &opts);
4108 assert!(matches!(result, Err(SparseError::InvalidInput { .. })));
4109 }
4110
4111 #[cfg(feature = "test-util")]
4114 mod stress_tests {
4115 use super::*;
4116 use crate::testing::generators::{RandomMatrixConfig, generate_random_symmetric};
4117 use rand::SeedableRng;
4118 use rand::rngs::StdRng;
4119
4120 fn random_dense_pd(n: usize, rng: &mut impl rand::Rng) -> Mat<f64> {
4122 let config = RandomMatrixConfig {
4123 size: n,
4124 target_nnz: n * n / 2,
4125 positive_definite: true,
4126 };
4127 generate_random_symmetric(&config, rng).unwrap().to_dense()
4128 }
4129
4130 fn random_dense_indefinite(n: usize, rng: &mut impl rand::Rng) -> Mat<f64> {
4132 let config = RandomMatrixConfig {
4133 size: n,
4134 target_nnz: n * n / 2,
4135 positive_definite: false,
4136 };
4137 generate_random_symmetric(&config, rng).unwrap().to_dense()
4138 }
4139
4140 #[test]
4141 fn test_random_pd_matrices() {
4142 let mut rng = StdRng::seed_from_u64(42);
4143 let opts = AptpOptions::default();
4144 let sizes = [5, 10, 20, 50];
4145 let mut total = 0;
4146
4147 for &n in &sizes {
4148 for _ in 0..25 {
4149 let a = random_dense_pd(n, &mut rng);
4150 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4151
4152 assert_eq!(
4153 result.stats.num_delayed, 0,
4154 "PD matrix {}x{} should have zero delays",
4155 n, n
4156 );
4157 let total_elim = result.stats.num_1x1 + 2 * result.stats.num_2x2;
4158 assert_eq!(
4159 total_elim, n,
4160 "PD matrix {}x{} should eliminate all columns (1x1={}, 2x2={})",
4161 n, n, result.stats.num_1x1, result.stats.num_2x2
4162 );
4163
4164 let error = dense_reconstruction_error(
4165 &a,
4166 &result.l,
4167 &result.d,
4168 result.perm.as_ref().arrays().0,
4169 );
4170 assert!(
4171 error < 1e-12,
4172 "PD {}x{}: reconstruction error {:.2e}",
4173 n,
4174 n,
4175 error
4176 );
4177 total += 1;
4178 }
4179 }
4180 assert!(total >= 100, "ran {} PD tests, need >= 100", total);
4181 }
4182
4183 #[test]
4184 fn test_random_indefinite_matrices() {
4185 let mut rng = StdRng::seed_from_u64(123);
4186 let opts = AptpOptions {
4187 fallback: AptpFallback::BunchKaufman,
4188 ..AptpOptions::default()
4189 };
4190 let sizes = [5, 10, 20, 50];
4191 let mut total = 0;
4192
4193 for &n in &sizes {
4194 for _ in 0..25 {
4195 let a = random_dense_indefinite(n, &mut rng);
4196 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4197
4198 let sum =
4200 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
4201 assert_eq!(sum, n, "stats invariant for {}x{}", n, n);
4202
4203 assert!(
4205 result.stats.max_l_entry < 1.0 / opts.threshold,
4206 "stability bound violated for {}x{}",
4207 n,
4208 n
4209 );
4210
4211 if result.stats.num_delayed == 0 {
4213 let error = dense_reconstruction_error(
4214 &a,
4215 &result.l,
4216 &result.d,
4217 result.perm.as_ref().arrays().0,
4218 );
4219 assert!(
4220 error < 1e-12,
4221 "indefinite {}x{}: reconstruction error {:.2e}",
4222 n,
4223 n,
4224 error
4225 );
4226 }
4227 total += 1;
4228 }
4229 }
4230 assert!(total >= 100, "ran {} indefinite tests, need >= 100", total);
4231 }
4232 }
4233
4234 #[cfg(feature = "test-util")]
4237 mod integration_tests {
4238 use super::*;
4239 use crate::testing::cases::{TestCaseFilter, load_test_cases};
4240
4241 #[test]
4242 #[ignore] fn test_hand_constructed_matrices() {
4244 let cases = load_test_cases(&TestCaseFilter::hand_constructed())
4245 .expect("failed to load hand-constructed matrices");
4246 assert_eq!(cases.len(), 15, "expected 15 hand-constructed matrices");
4247
4248 let opts = AptpOptions::default();
4249 let mut passed = 0;
4250
4251 for case in &cases {
4252 let dense = case.matrix.to_dense();
4253 let n = dense.nrows();
4254
4255 if n > 500 {
4257 continue;
4258 }
4259
4260 let result = aptp_factor(dense.as_ref(), &opts).unwrap();
4261
4262 let sum =
4263 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
4264 assert_eq!(sum, n, "{}: stats invariant failed", case.name);
4265
4266 if result.stats.num_delayed == 0 {
4267 let error = dense_reconstruction_error(
4268 &dense,
4269 &result.l,
4270 &result.d,
4271 result.perm.as_ref().arrays().0,
4272 );
4273 assert!(
4274 error < 1e-12,
4275 "{}: reconstruction error {:.2e}",
4276 case.name,
4277 error
4278 );
4279 }
4280 passed += 1;
4281 }
4282 assert!(passed >= 10, "only {} hand-constructed passed", passed);
4283 }
4284
4285 #[test]
4286 #[ignore] fn test_suitesparse_ci_dense() {
4288 let cases = load_test_cases(&TestCaseFilter::ci_subset())
4289 .expect("failed to load CI-subset matrices");
4290
4291 let opts = AptpOptions::default();
4292 let mut tested = 0;
4293
4294 for case in &cases {
4295 let n = case.matrix.nrows();
4296
4297 if n > 200 {
4300 continue;
4301 }
4302
4303 let dense = case.matrix.to_dense();
4304
4305 let result = aptp_factor(dense.as_ref(), &opts).unwrap();
4306
4307 let sum =
4308 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
4309 assert_eq!(sum, n, "{}: stats invariant failed", case.name);
4310
4311 if result.stats.num_delayed == 0 {
4312 let error = dense_reconstruction_error(
4313 &dense,
4314 &result.l,
4315 &result.d,
4316 result.perm.as_ref().arrays().0,
4317 );
4318 assert!(
4319 error < 1e-12,
4320 "{}: reconstruction error {:.2e}",
4321 case.name,
4322 error
4323 );
4324 }
4325 tested += 1;
4326 }
4327 assert!(
4328 tested > 0,
4329 "no CI-subset matrices small enough for dense test"
4330 );
4331 }
4332 }
4333
4334 #[test]
4337 fn test_factor_inner_reconstruction_moderate() {
4338 let sizes = [64, 128, 256];
4341 let opts = AptpOptions::default();
4342
4343 for &n in &sizes {
4344 let a = symmetric_matrix(n, |i, j| {
4345 let seed = (i * 1000 + j * 7 + 13) as f64;
4346 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4347 if i == j { val * 10.0 } else { val }
4348 });
4349
4350 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4351
4352 assert_eq!(
4354 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
4355 n,
4356 "factor_inner {}x{}: statistics invariant",
4357 n,
4358 n
4359 );
4360
4361 if result.stats.num_delayed == 0 {
4362 let error = dense_reconstruction_error(
4363 &a,
4364 &result.l,
4365 &result.d,
4366 result.perm.as_ref().arrays().0,
4367 );
4368 assert!(
4369 error < 1e-12,
4370 "factor_inner {}x{}: reconstruction error {:.2e} >= 1e-12",
4371 n,
4372 n,
4373 error
4374 );
4375 }
4376 }
4377 }
4378
4379 #[test]
4380 fn test_factor_inner_partial_factorization() {
4381 let n = 64;
4383 let p = 48; let a = symmetric_matrix(n, |i, j| {
4385 let seed = (i * 1000 + j * 7 + 13) as f64;
4386 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4387 if i == j { val * 10.0 } else { val }
4388 });
4389
4390 let opts = AptpOptions::default();
4391 let mut a_copy = a.to_owned();
4392 let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
4393
4394 assert!(
4396 result.num_eliminated <= p,
4397 "eliminated {} > p={}",
4398 result.num_eliminated,
4399 p
4400 );
4401
4402 assert_eq!(
4404 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
4405 p,
4406 "statistics invariant for partial factorization"
4407 );
4408 }
4409
4410 #[test]
4413 fn test_two_level_dispatch_small_block_size() {
4414 let sizes = [33, 64, 100];
4417 let opts = AptpOptions {
4418 outer_block_size: 32,
4419 inner_block_size: 8,
4420 ..AptpOptions::default()
4421 };
4422
4423 for &n in &sizes {
4424 let a = symmetric_matrix(n, |i, j| {
4425 let seed = (i * 1000 + j * 7 + 13) as f64;
4426 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4427 if i == j { val * 10.0 } else { val }
4428 });
4429
4430 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4431
4432 assert_eq!(
4433 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
4434 n,
4435 "two-level {}x{}: statistics invariant",
4436 n,
4437 n
4438 );
4439
4440 if result.stats.num_delayed == 0 {
4441 let error = dense_reconstruction_error(
4442 &a,
4443 &result.l,
4444 &result.d,
4445 result.perm.as_ref().arrays().0,
4446 );
4447 assert!(
4448 error < 1e-12,
4449 "two-level {}x{}: reconstruction error {:.2e} >= 1e-12",
4450 n,
4451 n,
4452 error
4453 );
4454 }
4455 }
4456 }
4457
4458 #[test]
4459 fn test_two_level_single_outer_block() {
4460 let n = 32;
4462 let opts = AptpOptions {
4463 outer_block_size: 32,
4464 inner_block_size: 8,
4465 ..AptpOptions::default()
4466 };
4467
4468 let a = symmetric_matrix(n, |i, j| {
4469 let seed = (i * 1000 + j * 7 + 13) as f64;
4470 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4471 if i == j { val * 10.0 } else { val }
4472 });
4473
4474 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4475 if result.stats.num_delayed == 0 {
4476 let error = dense_reconstruction_error(
4477 &a,
4478 &result.l,
4479 &result.d,
4480 result.perm.as_ref().arrays().0,
4481 );
4482 assert!(error < 1e-12, "single block: error {:.2e}", error);
4483 }
4484 }
4485
4486 #[test]
4487 fn test_two_level_boundary_nb_plus_1() {
4488 let n = 33;
4490 let opts = AptpOptions {
4491 outer_block_size: 32,
4492 inner_block_size: 8,
4493 ..AptpOptions::default()
4494 };
4495
4496 let a = symmetric_matrix(n, |i, j| {
4497 let seed = (i * 1000 + j * 7 + 13) as f64;
4498 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4499 if i == j { val * 10.0 } else { val }
4500 });
4501
4502 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4503 assert_eq!(
4504 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
4505 n,
4506 );
4507 if result.stats.num_delayed == 0 {
4508 let error = dense_reconstruction_error(
4509 &a,
4510 &result.l,
4511 &result.d,
4512 result.perm.as_ref().arrays().0,
4513 );
4514 assert!(error < 1e-12, "nb+1 boundary: error {:.2e}", error);
4515 }
4516 }
4517
4518 #[test]
4519 fn test_two_level_partial_factorization() {
4520 let n = 80;
4523 let p = 50; let opts = AptpOptions {
4525 outer_block_size: 32,
4526 inner_block_size: 8,
4527 ..AptpOptions::default()
4528 };
4529
4530 let a = symmetric_matrix(n, |i, j| {
4531 let seed = (i * 1000 + j * 7 + 13) as f64;
4532 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4533 if i == j { val * 10.0 } else { val }
4534 });
4535
4536 let mut a_copy = a.to_owned();
4537 let result = aptp_factor_in_place(a_copy.as_mut(), p, &opts).unwrap();
4538
4539 assert!(result.num_eliminated <= p);
4540 assert_eq!(
4541 result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed,
4542 p,
4543 );
4544 }
4545
4546 #[test]
4547 fn test_two_level_vs_single_level_equivalence() {
4548 let n = 64;
4551 let a = symmetric_matrix(n, |i, j| {
4552 let seed = (i * 1000 + j * 7 + 13) as f64;
4553 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4554 if i == j { val * 10.0 } else { val }
4555 });
4556
4557 let opts_single = AptpOptions {
4559 outer_block_size: 256,
4560 inner_block_size: 32,
4561 ..AptpOptions::default()
4562 };
4563 let result_single = aptp_factor(a.as_ref(), &opts_single).unwrap();
4564
4565 let opts_two = AptpOptions {
4567 outer_block_size: 16,
4568 inner_block_size: 8,
4569 ..AptpOptions::default()
4570 };
4571 let result_two = aptp_factor(a.as_ref(), &opts_two).unwrap();
4572
4573 if result_single.stats.num_delayed == 0 {
4575 let err_s = dense_reconstruction_error(
4576 &a,
4577 &result_single.l,
4578 &result_single.d,
4579 result_single.perm.as_ref().arrays().0,
4580 );
4581 assert!(err_s < 1e-12, "single-level error {:.2e}", err_s);
4582 }
4583 if result_two.stats.num_delayed == 0 {
4584 let err_t = dense_reconstruction_error(
4585 &a,
4586 &result_two.l,
4587 &result_two.d,
4588 result_two.perm.as_ref().arrays().0,
4589 );
4590 assert!(err_t < 1e-12, "two-level error {:.2e}", err_t);
4591 }
4592 }
4593
4594 #[test]
4595 fn test_two_level_vs_unblocked_reconstruction() {
4596 let n = 512;
4605
4606 let a = symmetric_matrix(n, |i, j| {
4609 let seed = (i * 997 + j * 1013 + 42) as f64;
4610 let val = (seed * 0.618033988749).fract() * 2.0 - 1.0;
4611 if i == j {
4612 val * 0.5
4614 } else {
4615 val
4616 }
4617 });
4618
4619 let opts_unblocked = AptpOptions {
4621 outer_block_size: 100_000,
4622 inner_block_size: 32,
4623 ..AptpOptions::default()
4624 };
4625 let result_unblocked = aptp_factor(a.as_ref(), &opts_unblocked).unwrap();
4626 let err_unblocked = dense_reconstruction_error(
4627 &a,
4628 &result_unblocked.l,
4629 &result_unblocked.d,
4630 result_unblocked.perm.as_ref().arrays().0,
4631 );
4632 assert!(
4633 err_unblocked < 1e-12,
4634 "unblocked reconstruction error {:.2e} exceeds 1e-12",
4635 err_unblocked,
4636 );
4637
4638 let opts_two_level = AptpOptions {
4640 outer_block_size: 128,
4641 inner_block_size: 32,
4642 ..AptpOptions::default()
4643 };
4644 let result_two_level = aptp_factor(a.as_ref(), &opts_two_level).unwrap();
4645 let err_two_level = dense_reconstruction_error(
4646 &a,
4647 &result_two_level.l,
4648 &result_two_level.d,
4649 result_two_level.perm.as_ref().arrays().0,
4650 );
4651 assert!(
4652 err_two_level < 1e-12,
4653 "two-level reconstruction error {:.2e} exceeds 1e-12 \
4654 (unblocked was {:.2e})",
4655 err_two_level,
4656 err_unblocked,
4657 );
4658
4659 let ratio = if err_unblocked > 0.0 {
4661 err_two_level / err_unblocked
4662 } else {
4663 1.0
4664 };
4665 assert!(
4666 ratio < 10.0,
4667 "two-level error ({:.2e}) is {:.1}x worse than unblocked ({:.2e})",
4668 err_two_level,
4669 ratio,
4670 err_unblocked,
4671 );
4672 }
4673
4674 #[test]
4677 fn test_factor_block_diagonal_basic() {
4678 let mut a = Mat::identity(8, 8);
4681 let (d, perm, nelim, stats, _log) = factor_block_diagonal(a.as_mut(), 0, 4, 1e-20, 4);
4682
4683 assert_eq!(nelim, 4);
4684 assert_eq!(stats.num_1x1, 4);
4685 assert_eq!(stats.num_2x2, 0);
4686 assert_eq!(stats.num_delayed, 0);
4687 assert!(stats.max_l_entry < 1e-14);
4688
4689 for i in 0..4 {
4691 assert!((d.diagonal_1x1(i) - 1.0).abs() < 1e-14);
4692 }
4693
4694 assert_eq!(perm, vec![0, 1, 2, 3]);
4696
4697 for i in 4..8 {
4699 for j in 0..4 {
4700 assert!(
4701 a[(i, j)].abs() < 1e-14,
4702 "panel entry ({},{}) should be 0, got {}",
4703 i,
4704 j,
4705 a[(i, j)]
4706 );
4707 }
4708 }
4709 }
4710
4711 #[test]
4712 fn test_factor_block_diagonal_block_scoped_swap() {
4713 let mut a = symmetric_matrix(8, |i, j| {
4717 if i == j {
4718 [1.0, 5.0, 2.0, 3.0, 0.1, 0.1, 0.1, 0.1][i]
4719 } else if (i == 4 && j == 1) || (i == 1 && j == 4) {
4720 0.99
4722 } else {
4723 0.0
4724 }
4725 });
4726
4727 let panel_row_before: Vec<f64> = (0..4).map(|j| a[(4, j)]).collect();
4729
4730 let (_d, perm, nelim, _stats, _log) = factor_block_diagonal(a.as_mut(), 0, 4, 1e-20, 4);
4731
4732 assert!(nelim > 0, "should eliminate at least 1 column");
4733
4734 assert_eq!(
4736 perm[0], 1,
4737 "first pivot should be original column 1 (max diag = 5.0)"
4738 );
4739
4740 for j in 0..4 {
4742 assert!(
4743 (a[(4, j)] - panel_row_before[j]).abs() < 1e-14,
4744 "panel row (4,{}) should be unchanged: got {}, expected {}",
4745 j,
4746 a[(4, j)],
4747 panel_row_before[j]
4748 );
4749 }
4750 }
4751
4752 #[test]
4753 fn test_blas3_pipeline_reconstruction() {
4754 let a = symmetric_matrix(8, |i, j| {
4757 if i == j {
4758 10.0 + i as f64
4759 } else {
4760 1.0 / (1.0 + (i as f64 - j as f64).abs())
4761 }
4762 });
4763
4764 let opts = AptpOptions {
4765 inner_block_size: 4, ..AptpOptions::default()
4767 };
4768 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4769
4770 assert_eq!(result.stats.num_delayed, 0, "should have no delays");
4771
4772 let error =
4773 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
4774 assert!(
4775 error < 1e-12,
4776 "BLAS-3 pipeline reconstruction error {:.2e} >= 1e-12",
4777 error
4778 );
4779 }
4780
4781 #[test]
4782 fn test_blas3_threshold_failure_and_retry() {
4783 let a = symmetric_matrix(8, |i, j| {
4787 let vals = [
4788 [1e-3, 1.0, 0.5, 0.5, 0.1, 0.1, 0.1, 0.1],
4789 [1.0, 1e-3, 0.5, 0.5, 0.1, 0.1, 0.1, 0.1],
4790 [0.5, 0.5, 10.0, 1.0, 0.1, 0.1, 0.1, 0.1],
4791 [0.5, 0.5, 1.0, 10.0, 0.1, 0.1, 0.1, 0.1],
4792 [0.1, 0.1, 0.1, 0.1, 5.0, 0.5, 0.0, 0.0],
4793 [0.1, 0.1, 0.1, 0.1, 0.5, 6.0, 0.0, 0.0],
4794 [0.1, 0.1, 0.1, 0.1, 0.0, 0.0, 7.0, 0.5],
4795 [0.1, 0.1, 0.1, 0.1, 0.0, 0.0, 0.5, 8.0],
4796 ];
4797 vals[i][j]
4798 });
4799
4800 let opts = AptpOptions {
4801 inner_block_size: 4,
4802 threshold: 0.01, ..AptpOptions::default()
4804 };
4805 let result = aptp_factor(a.as_ref(), &opts).unwrap();
4806
4807 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
4809 assert_eq!(sum, 8, "statistics sum {} != 8", sum);
4810
4811 if result.stats.num_delayed == 0 {
4813 let error = dense_reconstruction_error(
4814 &a,
4815 &result.l,
4816 &result.d,
4817 result.perm.as_ref().arrays().0,
4818 );
4819 assert!(
4820 error < 1e-12,
4821 "threshold failure retry: reconstruction error {:.2e}",
4822 error
4823 );
4824 }
4825 }
4826
4827 #[test]
4828 fn test_adjust_for_2x2_boundary() {
4829 let mut d = MixedDiagonal::new(4);
4832 d.set_1x1(0, 1.0);
4833 d.set_2x2(Block2x2 {
4834 first_col: 1,
4835 a: 1.0,
4836 b: 0.5,
4837 c: 2.0,
4838 });
4839 d.set_1x1(3, 3.0);
4840
4841 assert_eq!(adjust_for_2x2_boundary(2, &d), 1);
4844
4845 assert_eq!(adjust_for_2x2_boundary(3, &d), 3);
4848
4849 assert_eq!(adjust_for_2x2_boundary(1, &d), 1);
4851
4852 assert_eq!(adjust_for_2x2_boundary(4, &d), 4);
4854
4855 assert_eq!(adjust_for_2x2_boundary(0, &d), 0);
4857 }
4858
4859 #[test]
4860 fn test_blas3_full_block_singular() {
4861 let mut a = Mat::zeros(8, 8);
4863 for i in 0..8 {
4864 a[(i, i)] = 1e-25;
4865 }
4866
4867 let opts = AptpOptions {
4868 inner_block_size: 4,
4869 failed_pivot_method: FailedPivotMethod::Pass,
4870 ..AptpOptions::default()
4871 };
4872 let mut a_copy = a.clone();
4873 let result = aptp_factor_in_place(a_copy.as_mut(), 8, &opts).unwrap();
4874
4875 assert_eq!(result.num_eliminated, 0);
4876 assert_eq!(result.stats.num_delayed, 8);
4877 assert_eq!(result.delayed_cols.len(), 8);
4878 }
4879
4880 fn deterministic_indefinite_matrix(n: usize, seed: u64, diag_scale: f64) -> Mat<f64> {
4887 let hash = |a: usize, b: usize| -> f64 {
4889 let mut s = seed
4890 .wrapping_add((a * 10007) as u64)
4891 .wrapping_add((b * 7) as u64);
4892 s = s
4893 .wrapping_mul(6364136223846793005)
4894 .wrapping_add(1442695040888963407);
4895 s = s
4896 .wrapping_mul(6364136223846793005)
4897 .wrapping_add(1442695040888963407);
4898 ((s >> 33) as f64) / (u32::MAX as f64 / 2.0) - 1.0
4899 };
4900
4901 symmetric_matrix(n, |i, j| {
4902 if i == j {
4903 hash(i, i) * diag_scale
4904 } else {
4905 hash(i.min(j), i.max(j) + n)
4907 }
4908 })
4909 }
4910
4911 fn cause_delays(a: &mut Mat<f64>, seed: u64, scale: f64) {
4915 let n = a.nrows();
4916 let n_scaled = (n / 8).max(1);
4917
4918 let mut state = seed;
4920 let mut next_idx = || -> usize {
4921 state = state
4922 .wrapping_mul(6364136223846793005)
4923 .wrapping_add(1442695040888963407);
4924 ((state >> 33) as usize) % n
4925 };
4926
4927 let mut scaled_rows = Vec::new();
4928 while scaled_rows.len() < n_scaled {
4929 let idx = next_idx();
4930 if !scaled_rows.contains(&idx) {
4931 scaled_rows.push(idx);
4932 }
4933 }
4934
4935 for &r in &scaled_rows {
4938 for j in 0..n {
4939 a[(r, j)] *= scale;
4940 a[(j, r)] *= scale;
4941 }
4942 }
4944 }
4945
4946 #[test]
4947 fn test_factor_inner_with_delays() {
4948 struct TestConfig {
4960 n: usize,
4961 ib: usize,
4962 seed: u64,
4963 scale: f64,
4964 }
4965
4966 let configs = [
4967 TestConfig {
4969 n: 8,
4970 ib: 2,
4971 seed: 42,
4972 scale: 1000.0,
4973 },
4974 TestConfig {
4976 n: 8,
4977 ib: 4,
4978 seed: 42,
4979 scale: 1000.0,
4980 },
4981 TestConfig {
4983 n: 16,
4984 ib: 4,
4985 seed: 42,
4986 scale: 1000.0,
4987 },
4988 TestConfig {
4990 n: 16,
4991 ib: 2,
4992 seed: 42,
4993 scale: 1000.0,
4994 },
4995 TestConfig {
4997 n: 32,
4998 ib: 4,
4999 seed: 42,
5000 scale: 1000.0,
5001 },
5002 TestConfig {
5004 n: 32,
5005 ib: 8,
5006 seed: 42,
5007 scale: 1000.0,
5008 },
5009 TestConfig {
5011 n: 16,
5012 ib: 4,
5013 seed: 137,
5014 scale: 1000.0,
5015 },
5016 TestConfig {
5018 n: 16,
5019 ib: 4,
5020 seed: 42,
5021 scale: 1e6,
5022 },
5023 TestConfig {
5025 n: 64,
5026 ib: 8,
5027 seed: 42,
5028 scale: 1000.0,
5029 },
5030 TestConfig {
5032 n: 64,
5033 ib: 16,
5034 seed: 42,
5035 scale: 1000.0,
5036 },
5037 ];
5038
5039 let mut _any_delays_found = false;
5040 let mut any_failures = false;
5041
5042 for (idx, config) in configs.iter().enumerate() {
5043 let mut a = deterministic_indefinite_matrix(config.n, config.seed, 5.0);
5044 cause_delays(&mut a, config.seed + 1000, config.scale);
5045
5046 let opts = AptpOptions {
5047 inner_block_size: config.ib,
5048 outer_block_size: config.n.max(config.ib),
5049 threshold: 0.01,
5050 ..AptpOptions::default()
5051 };
5052
5053 let result = aptp_factor(a.as_ref(), &opts).unwrap();
5054
5055 let n = config.n;
5056 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
5057 assert_eq!(
5058 sum, n,
5059 "config {}: statistics invariant broken: {} != {}",
5060 idx, sum, n
5061 );
5062
5063 if result.stats.num_delayed > 0 {
5064 _any_delays_found = true;
5065 }
5066
5067 if result.stats.num_delayed == 0 {
5069 let error = dense_reconstruction_error(
5070 &a,
5071 &result.l,
5072 &result.d,
5073 result.perm.as_ref().arrays().0,
5074 );
5075 if error >= 1e-12 {
5076 any_failures = true;
5077 }
5078 assert!(
5079 error < 1e-10,
5080 "config {} (n={}, ib={}): reconstruction error {:.2e} >= 1e-10 \
5081 (no delays but bad reconstruction indicates factor_inner bug)",
5082 idx,
5083 config.n,
5084 config.ib,
5085 error
5086 );
5087 }
5088 }
5089
5090 assert!(
5097 !any_failures,
5098 "Some configs had reconstruction error >= 1e-12 without delays. \
5099 This indicates a bug in factor_inner's backup/restore/update logic."
5100 );
5101 }
5102
5103 #[test]
5104 fn test_factor_inner_with_delays_targeted() {
5105 let n = 8;
5125
5126 for &ib in &[4, 2] {
5127 let a = symmetric_matrix(n, |i, j| {
5128 match (i, j) {
5129 (0, 0) => 10.0,
5135 (1, 1) => 10.0,
5136 (2, 2) => 0.005,
5137 (3, 3) => 0.005,
5138 (i, j) if i < 4 && j < 4 && i != j => 0.001,
5139
5140 (_, j) if j < 4 => 2.0,
5142
5143 (i, _) if i == j => 20.0,
5145 (_, _) => 0.5,
5146 }
5147 });
5148
5149 let opts_pass = AptpOptions {
5151 inner_block_size: ib,
5152 outer_block_size: 256,
5153 threshold: 0.01,
5154 failed_pivot_method: FailedPivotMethod::Pass,
5155 ..AptpOptions::default()
5156 };
5157
5158 let result = aptp_factor(a.as_ref(), &opts_pass).unwrap();
5159
5160 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
5161 assert_eq!(sum, n, "ib={}: statistics invariant: {} != {}", ib, sum, n);
5162
5163 if result.stats.num_delayed == 0 {
5165 let error = dense_reconstruction_error(
5166 &a,
5167 &result.l,
5168 &result.d,
5169 result.perm.as_ref().arrays().0,
5170 );
5171 assert!(
5172 error < 1e-12,
5173 "targeted (ib={}): reconstruction error {:.2e} >= 1e-12 (no delays)",
5174 ib,
5175 error
5176 );
5177 }
5178
5179 if ib == 4 || ib == 2 {
5182 assert!(
5186 result.stats.num_delayed > 0,
5187 "ib={}: expected some delays for small-pivot columns",
5188 ib
5189 );
5190 }
5191
5192 {
5194 let p = 6; let mut a_copy = a.clone();
5196 let opts_partial = AptpOptions {
5197 inner_block_size: ib,
5198 outer_block_size: 256,
5199 threshold: 0.01,
5200 failed_pivot_method: FailedPivotMethod::Pass,
5201 ..AptpOptions::default()
5202 };
5203 let result_p = aptp_factor_in_place(a_copy.as_mut(), p, &opts_partial).unwrap();
5204 let error_p = check_partial_factorization_in_place(&a, &a_copy, p, &result_p);
5205 assert!(
5206 error_p < 1e-10,
5207 "targeted partial (ib={}, p={}): error {:.2e} >= 1e-10",
5208 ib,
5209 p,
5210 error_p
5211 );
5212 }
5213 }
5214 }
5215
5216 #[test]
5217 fn test_factor_inner_with_delays_aggressive() {
5218 let test_cases: Vec<(usize, usize, u64)> = vec![
5228 (12, 4, 1),
5229 (12, 4, 2),
5230 (12, 4, 3),
5231 (16, 4, 1),
5232 (16, 4, 2),
5233 (16, 8, 1),
5234 (20, 4, 1),
5235 (24, 4, 1),
5236 (24, 8, 1),
5237 (32, 4, 1),
5238 (32, 8, 1),
5239 (32, 16, 1),
5240 (48, 8, 1),
5241 (64, 8, 1),
5242 (64, 16, 1),
5243 ];
5244
5245 let mut _total_delays = 0;
5246 let mut max_error = 0.0_f64;
5247 let mut _worst_config = String::new();
5248
5249 for &(n, ib, seed) in &test_cases {
5250 let mut a = deterministic_indefinite_matrix(n, seed * 31337, 5.0);
5252
5253 cause_delays(&mut a, seed * 31337 + 7919, 1000.0);
5255
5256 let opts = AptpOptions {
5257 inner_block_size: ib,
5258 outer_block_size: n.max(ib),
5259 threshold: 0.01,
5260 ..AptpOptions::default()
5261 };
5262
5263 let result = aptp_factor(a.as_ref(), &opts).unwrap();
5264
5265 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
5266 assert_eq!(
5267 sum, n,
5268 "(n={}, ib={}, seed={}): stats invariant {} != {}",
5269 n, ib, seed, sum, n
5270 );
5271
5272 _total_delays += result.stats.num_delayed;
5273
5274 if result.stats.num_delayed == 0 {
5275 let error = dense_reconstruction_error(
5276 &a,
5277 &result.l,
5278 &result.d,
5279 result.perm.as_ref().arrays().0,
5280 );
5281 if error > max_error {
5282 max_error = error;
5283 _worst_config =
5284 format!("n={}, ib={}, seed={}: error={:.2e}", n, ib, seed, error);
5285 }
5286 assert!(
5287 error < 1e-10,
5288 "(n={}, ib={}, seed={}): reconstruction error {:.2e} >= 1e-10",
5289 n,
5290 ib,
5291 seed,
5292 error
5293 );
5294 }
5295 }
5296 }
5297
5298 #[test]
5299 fn test_factor_inner_cause_delays_then_compare_single_vs_blocked() {
5300 let seeds = [42u64, 137, 271, 314, 997];
5308 let n = 16;
5309
5310 for &seed in &seeds {
5311 let mut a = deterministic_indefinite_matrix(n, seed, 5.0);
5312 cause_delays(&mut a, seed + 5000, 500.0);
5313
5314 let opts_single = AptpOptions {
5316 inner_block_size: n,
5317 outer_block_size: n,
5318 threshold: 0.01,
5319 ..AptpOptions::default()
5320 };
5321 let result_single = aptp_factor(a.as_ref(), &opts_single).unwrap();
5322
5323 let opts_blocked = AptpOptions {
5325 inner_block_size: 4,
5326 outer_block_size: n,
5327 threshold: 0.01,
5328 ..AptpOptions::default()
5329 };
5330 let result_blocked = aptp_factor(a.as_ref(), &opts_blocked).unwrap();
5331
5332 if result_single.stats.num_delayed == 0 {
5334 let error_s = dense_reconstruction_error(
5335 &a,
5336 &result_single.l,
5337 &result_single.d,
5338 result_single.perm.as_ref().arrays().0,
5339 );
5340 assert!(
5341 error_s < 1e-12,
5342 "seed={}: single-block error {:.2e}",
5343 seed,
5344 error_s
5345 );
5346 }
5347
5348 if result_blocked.stats.num_delayed == 0 {
5349 let error_b = dense_reconstruction_error(
5350 &a,
5351 &result_blocked.l,
5352 &result_blocked.d,
5353 result_blocked.perm.as_ref().arrays().0,
5354 );
5355 assert!(
5356 error_b < 1e-12,
5357 "seed={}: blocked error {:.2e} >= 1e-12",
5358 seed,
5359 error_b
5360 );
5361 }
5362 }
5363 }
5364
5365 fn check_partial_factorization_in_place(
5378 original: &Mat<f64>,
5379 factored: &Mat<f64>,
5380 num_fully_summed: usize,
5381 result: &AptpFactorResult,
5382 ) -> f64 {
5383 let m = original.nrows();
5384 let q = result.num_eliminated;
5385 let p = num_fully_summed;
5386 let perm = &result.perm;
5387
5388 let mut factored = factored.clone();
5393 let nfs = m - p;
5394 if nfs > 0 && q > 0 {
5395 let mut contrib_buffer = Mat::zeros(nfs, nfs);
5396 let mut ld_ws = Mat::new();
5397 compute_contribution_gemm(
5398 &factored,
5399 p,
5400 q,
5401 m,
5402 &result.d,
5403 &mut contrib_buffer,
5404 &mut ld_ws,
5405 Par::Seq,
5406 );
5407 for i in 0..nfs {
5409 for j in 0..=i {
5410 factored[(p + i, p + j)] = contrib_buffer[(i, j)];
5411 }
5412 }
5413 }
5414 let d = &result.d;
5415
5416 let mut papt = Mat::zeros(m, m);
5418 for i in 0..m {
5419 for j in 0..m {
5420 papt[(i, j)] = original[(perm[i], perm[j])];
5421 }
5422 }
5423
5424 let mut l_full = Mat::zeros(m, q);
5426 for i in 0..q {
5427 l_full[(i, i)] = 1.0;
5428 }
5429 let mut col = 0;
5430 while col < q {
5431 match d.pivot_type(col) {
5432 PivotType::OneByOne => {
5433 for i in (col + 1)..m {
5434 l_full[(i, col)] = factored[(i, col)];
5435 }
5436 col += 1;
5437 }
5438 PivotType::TwoByTwo { partner } if partner > col => {
5439 for i in (col + 2)..m {
5441 l_full[(i, col)] = factored[(i, col)];
5442 l_full[(i, col + 1)] = factored[(i, col + 1)];
5443 }
5444 col += 2;
5445 }
5446 _ => {
5447 col += 1;
5448 }
5449 }
5450 }
5451
5452 let mut d_mat = Mat::zeros(q, q);
5454 col = 0;
5455 while col < q {
5456 match d.pivot_type(col) {
5457 PivotType::OneByOne => {
5458 d_mat[(col, col)] = d.diagonal_1x1(col);
5459 col += 1;
5460 }
5461 PivotType::TwoByTwo { partner } if partner > col => {
5462 let block = d.diagonal_2x2(col);
5463 d_mat[(col, col)] = block.a;
5464 d_mat[(col, col + 1)] = block.b;
5465 d_mat[(col + 1, col)] = block.b;
5466 d_mat[(col + 1, col + 1)] = block.c;
5467 col += 2;
5468 }
5469 _ => {
5470 col += 1;
5471 }
5472 }
5473 }
5474
5475 let mut w = Mat::zeros(m, q);
5478 for i in 0..m {
5479 for j in 0..q {
5480 let mut sum = 0.0;
5481 for k in 0..q {
5482 sum += l_full[(i, k)] * d_mat[(k, j)];
5483 }
5484 w[(i, j)] = sum;
5485 }
5486 }
5487 let mut ldlt = Mat::zeros(m, m);
5489 for i in 0..m {
5490 for j in 0..m {
5491 let mut sum = 0.0;
5492 for k in 0..q {
5493 sum += w[(i, k)] * l_full[(j, k)];
5494 }
5495 ldlt[(i, j)] = sum;
5496 }
5497 }
5498
5499 let mut max_error = 0.0_f64;
5505 let mut norm_sq = 0.0_f64;
5506 let mut diff_sq = 0.0_f64;
5507
5508 for i in 0..m {
5513 for j in 0..=i {
5514 let weight = if i == j { 1.0 } else { 2.0 };
5516 norm_sq += weight * papt[(i, j)] * papt[(i, j)];
5517 let residual = papt[(i, j)] - ldlt[(i, j)];
5518 if i >= q && j >= q {
5519 let schur_entry = factored[(i, j)];
5521 let err = (residual - schur_entry).abs();
5522 if err > max_error {
5523 max_error = err;
5524 }
5525 diff_sq += weight * (residual - schur_entry) * (residual - schur_entry);
5526 } else {
5527 diff_sq += weight * residual * residual;
5529 if residual.abs() > max_error {
5530 max_error = residual.abs();
5531 }
5532 }
5533 }
5534 }
5535
5536 if norm_sq == 0.0 {
5537 diff_sq.sqrt()
5538 } else {
5539 (diff_sq / norm_sq).sqrt()
5540 }
5541 }
5542
5543 #[test]
5544 fn test_factor_inner_partial_with_delays_schur_check() {
5545 struct PartialConfig {
5558 m: usize, p: usize, ib: usize, seed: u64,
5562 scale: f64,
5563 }
5564
5565 let configs = [
5566 PartialConfig {
5568 m: 12,
5569 p: 8,
5570 ib: 4,
5571 seed: 42,
5572 scale: 1000.0,
5573 },
5574 PartialConfig {
5575 m: 12,
5576 p: 8,
5577 ib: 4,
5578 seed: 137,
5579 scale: 1000.0,
5580 },
5581 PartialConfig {
5582 m: 12,
5583 p: 8,
5584 ib: 2,
5585 seed: 42,
5586 scale: 1000.0,
5587 },
5588 PartialConfig {
5590 m: 16,
5591 p: 12,
5592 ib: 4,
5593 seed: 42,
5594 scale: 1000.0,
5595 },
5596 PartialConfig {
5597 m: 16,
5598 p: 12,
5599 ib: 4,
5600 seed: 271,
5601 scale: 1000.0,
5602 },
5603 PartialConfig {
5605 m: 20,
5606 p: 16,
5607 ib: 4,
5608 seed: 42,
5609 scale: 1000.0,
5610 },
5611 PartialConfig {
5612 m: 20,
5613 p: 16,
5614 ib: 8,
5615 seed: 42,
5616 scale: 1000.0,
5617 },
5618 PartialConfig {
5620 m: 32,
5621 p: 24,
5622 ib: 8,
5623 seed: 42,
5624 scale: 1000.0,
5625 },
5626 PartialConfig {
5627 m: 32,
5628 p: 24,
5629 ib: 4,
5630 seed: 42,
5631 scale: 1000.0,
5632 },
5633 PartialConfig {
5635 m: 48,
5636 p: 32,
5637 ib: 8,
5638 seed: 42,
5639 scale: 1000.0,
5640 },
5641 PartialConfig {
5643 m: 16,
5644 p: 12,
5645 ib: 4,
5646 seed: 42,
5647 scale: 1e6,
5648 },
5649 PartialConfig {
5651 m: 16,
5652 p: 12,
5653 ib: 4,
5654 seed: 314,
5655 scale: 1000.0,
5656 },
5657 PartialConfig {
5658 m: 16,
5659 p: 12,
5660 ib: 4,
5661 seed: 997,
5662 scale: 1000.0,
5663 },
5664 ];
5665
5666 let mut any_delays = false;
5667 let mut worst_error = 0.0_f64;
5668 let mut worst_config = String::new();
5669
5670 for (idx, config) in configs.iter().enumerate() {
5671 let mut a = deterministic_indefinite_matrix(config.m, config.seed, 5.0);
5672 cause_delays(&mut a, config.seed + 1000, config.scale);
5673
5674 let opts = AptpOptions {
5675 inner_block_size: config.ib,
5676 outer_block_size: config.m.max(config.ib),
5677 threshold: 0.01,
5678 failed_pivot_method: FailedPivotMethod::Pass,
5679 ..AptpOptions::default()
5680 };
5681
5682 let original = a.clone();
5683 let result = aptp_factor_in_place(a.as_mut(), config.p, &opts).unwrap();
5684
5685 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
5686 assert_eq!(
5687 sum, config.p,
5688 "config {} (m={}, p={}, ib={}): stats invariant {} != {}",
5689 idx, config.m, config.p, config.ib, sum, config.p
5690 );
5691
5692 if result.stats.num_delayed > 0 {
5693 any_delays = true;
5694 }
5695
5696 let error = check_partial_factorization_in_place(&original, &a, config.p, &result);
5698
5699 if error > worst_error {
5700 worst_error = error;
5701 worst_config = format!(
5702 "config {} (m={}, p={}, ib={}, seed={})",
5703 idx, config.m, config.p, config.ib, config.seed
5704 );
5705 }
5706 }
5707
5708 assert!(
5709 any_delays,
5710 "No configurations triggered threshold delays. \
5711 Adjust scale or matrix construction to create delays."
5712 );
5713
5714 assert!(
5715 worst_error < 1e-10,
5716 "Worst partial factorization error: {:.2e} at {}\n\
5717 This indicates corrupted Schur complement — bug in \
5718 backup/restore/update_cross_terms logic in factor_inner.",
5719 worst_error,
5720 worst_config
5721 );
5722 }
5723
5724 #[test]
5727 fn test_two_level_nfs_boundary_mid_block() {
5728 struct Config {
5739 m: usize,
5740 p: usize,
5741 nb: usize,
5742 ib: usize,
5743 seed: u64,
5744 }
5745
5746 let configs = [
5747 Config {
5750 m: 48,
5751 p: 20,
5752 nb: 16,
5753 ib: 8,
5754 seed: 42,
5755 },
5756 Config {
5758 m: 64,
5759 p: 40,
5760 nb: 32,
5761 ib: 8,
5762 seed: 42,
5763 },
5764 Config {
5767 m: 24,
5768 p: 10,
5769 nb: 8,
5770 ib: 4,
5771 seed: 137,
5772 },
5773 Config {
5775 m: 48,
5776 p: 25,
5777 nb: 16,
5778 ib: 8,
5779 seed: 271,
5780 },
5781 Config {
5784 m: 32,
5785 p: 13,
5786 nb: 8,
5787 ib: 4,
5788 seed: 42,
5789 },
5790 ];
5791
5792 let mut worst_error = 0.0_f64;
5793 let mut worst_label = String::new();
5794
5795 for (idx, c) in configs.iter().enumerate() {
5796 let a = deterministic_indefinite_matrix(c.m, c.seed, 5.0);
5797
5798 let opts = AptpOptions {
5799 outer_block_size: c.nb,
5800 inner_block_size: c.ib,
5801 threshold: 0.01,
5802 failed_pivot_method: FailedPivotMethod::Pass,
5803 ..AptpOptions::default()
5804 };
5805
5806 let original = a.clone();
5807 let mut a_copy = a;
5808 let result = aptp_factor_in_place(a_copy.as_mut(), c.p, &opts).unwrap();
5809
5810 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
5811 assert_eq!(
5812 sum, c.p,
5813 "config {}: statistics invariant {} != {}",
5814 idx, sum, c.p
5815 );
5816
5817 let error = check_partial_factorization_in_place(&original, &a_copy, c.p, &result);
5818
5819 if error > worst_error {
5820 worst_error = error;
5821 worst_label = format!(
5822 "config {} (m={}, p={}, nb={}, ib={}, seed={})",
5823 idx, c.m, c.p, c.nb, c.ib, c.seed
5824 );
5825 }
5826 }
5827
5828 assert!(
5829 worst_error < 1e-10,
5830 "Worst reconstruction error: {:.2e} at {}\n\
5831 This indicates the local nfs_boundary computation in \
5832 two_level_factor is producing incorrect Schur complements.",
5833 worst_error,
5834 worst_label
5835 );
5836 }
5837
5838 #[test]
5841 fn test_tpp_helpers() {
5842 let a = symmetric_matrix(4, |i, j| {
5848 let vals = [
5849 [4.0, 0.5, 0.1, 0.3],
5850 [0.5, -3.0, 0.2, -0.4],
5851 [0.1, 0.2, 5.0, 0.6],
5852 [0.3, -0.4, 0.6, 2.0],
5853 ];
5854 vals[i][j]
5855 });
5856
5857 assert!(tpp_is_col_small(a.as_ref(), 0, 0, 4, 5.0));
5859 assert!(!tpp_is_col_small(a.as_ref(), 0, 0, 4, 0.01));
5861
5862 let z = Mat::zeros(4, 4);
5864 assert!(tpp_is_col_small(z.as_ref(), 0, 0, 4, 1e-20));
5865
5866 let t = tpp_find_row_abs_max(a.as_ref(), 3, 0, 3);
5869 assert_eq!(t, 2, "expected col 2, got {}", t);
5870
5871 let max_exc = tpp_find_rc_abs_max_exclude(a.as_ref(), 1, 0, 4, 3);
5876 assert!(
5877 (max_exc - 0.5).abs() < 1e-15,
5878 "expected 0.5, got {}",
5879 max_exc
5880 );
5881
5882 let result = tpp_test_2x2(a.as_ref(), 0, 1, 0.6, 0.6, 0.01, 1e-20);
5886 assert!(result.is_some(), "2x2 pivot (0,1) should pass");
5887 let (d11, d12, d22) = result.unwrap();
5888 assert!((d11 - 4.0).abs() < 1e-15);
5889 assert!((d12 - 0.5).abs() < 1e-15);
5890 assert!((d22 - (-3.0)).abs() < 1e-15);
5891
5892 let tiny = symmetric_matrix(2, |_, _| 1e-25);
5894 assert!(tpp_test_2x2(tiny.as_ref(), 0, 1, 1e-25, 1e-25, 0.01, 1e-20).is_none());
5895 }
5896
5897 #[test]
5898 fn test_tpp_basic_1x1() {
5899 let a = symmetric_matrix(4, |i, j| {
5901 if i == j {
5902 [10.0, -8.0, 5.0, 7.0][i]
5903 } else {
5904 0.1
5905 }
5906 });
5907
5908 let opts = AptpOptions::default();
5909 let result = aptp_factor(a.as_ref(), &opts).unwrap();
5910
5911 assert_eq!(result.stats.num_delayed, 0);
5912 assert_eq!(result.stats.num_1x1 + 2 * result.stats.num_2x2, 4);
5913
5914 let error =
5915 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
5916 assert!(error < 1e-12, "reconstruction error {:.2e}", error);
5917 }
5918
5919 #[test]
5920 fn test_tpp_basic_2x2() {
5921 let a = symmetric_matrix(4, |i, j| {
5924 let vals = [
5925 [0.001, 5.0, 0.01, 0.01],
5926 [5.0, 0.001, 0.01, 0.01],
5927 [0.01, 0.01, 0.001, 5.0],
5928 [0.01, 0.01, 5.0, 0.001],
5929 ];
5930 vals[i][j]
5931 });
5932
5933 let opts = AptpOptions::default();
5934 let result = aptp_factor(a.as_ref(), &opts).unwrap();
5935
5936 assert_eq!(result.stats.num_delayed, 0);
5937
5938 let error =
5939 dense_reconstruction_error(&a, &result.l, &result.d, result.perm.as_ref().arrays().0);
5940 assert!(error < 1e-12, "reconstruction error {:.2e}", error);
5941 }
5942
5943 #[test]
5944 fn test_tpp_fallback_after_aptp() {
5945 let n = 16;
5955 let a = symmetric_matrix(n, |i, j| {
5956 match (i, j) {
5957 (i, j) if i < 8 && j < 8 => {
5959 if i == j {
5960 10.0
5961 } else {
5962 0.01
5963 }
5964 }
5965 (i, j) if (8..12).contains(&i) && (8..12).contains(&j) => {
5967 if i == j {
5968 0.005
5969 } else {
5970 2.0
5971 }
5972 }
5973 (i, j) if i >= 12 && j >= 12 => {
5975 if i == j {
5976 20.0
5977 } else {
5978 0.1
5979 }
5980 }
5981 (i, j) if (8..12).contains(&i) && j < 8 => 3.0,
5983 (i, j) if i < 8 && (8..12).contains(&j) => 3.0,
5984 _ => 0.01,
5986 }
5987 });
5988
5989 let opts_pass = AptpOptions {
5991 inner_block_size: 4,
5992 failed_pivot_method: FailedPivotMethod::Pass,
5993 ..AptpOptions::default()
5994 };
5995 let result_pass = aptp_factor(a.as_ref(), &opts_pass).unwrap();
5996
5997 let opts_tpp = AptpOptions {
5999 inner_block_size: 4,
6000 failed_pivot_method: FailedPivotMethod::Tpp,
6001 ..AptpOptions::default()
6002 };
6003 let result_tpp = aptp_factor(a.as_ref(), &opts_tpp).unwrap();
6004
6005 assert!(
6007 result_tpp.stats.num_delayed <= result_pass.stats.num_delayed,
6008 "TPP should not increase delays"
6009 );
6010
6011 if result_tpp.stats.num_delayed == 0 {
6013 let error = dense_reconstruction_error(
6014 &a,
6015 &result_tpp.l,
6016 &result_tpp.d,
6017 result_tpp.perm.as_ref().arrays().0,
6018 );
6019 assert!(
6020 error < 1e-12,
6021 "TPP reconstruction error {:.2e} >= 1e-12",
6022 error
6023 );
6024 }
6025 }
6026
6027 #[test]
6028 fn test_tpp_reconstruction_stress() {
6029 use rand::SeedableRng;
6031 use rand::rngs::StdRng;
6032 use rand_distr::{Distribution, Uniform};
6033
6034 let n = 256;
6035 let mut rng = StdRng::seed_from_u64(42);
6036 let dist = Uniform::new(-1.0f64, 1.0).unwrap();
6037
6038 let mut a = Mat::zeros(n, n);
6039 for i in 0..n {
6040 for j in 0..=i {
6041 let v: f64 = dist.sample(&mut rng);
6042 a[(i, j)] = v;
6043 a[(j, i)] = v;
6044 }
6045 let sign = if i % 3 == 0 { -1.0 } else { 1.0 };
6047 a[(i, i)] = sign * (5.0 + dist.sample(&mut rng).abs());
6048 }
6049
6050 let opts = AptpOptions {
6051 failed_pivot_method: FailedPivotMethod::Tpp,
6052 ..AptpOptions::default()
6053 };
6054 let result = aptp_factor(a.as_ref(), &opts).unwrap();
6055
6056 if result.stats.num_delayed == 0 {
6057 let error = dense_reconstruction_error(
6058 &a,
6059 &result.l,
6060 &result.d,
6061 result.perm.as_ref().arrays().0,
6062 );
6063 assert!(
6064 error < 1e-10,
6065 "stress reconstruction error {:.2e} >= 1e-10",
6066 error
6067 );
6068 }
6069
6070 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
6072 assert_eq!(sum, n, "statistics invariant: {} != {}", sum, n);
6073 }
6074
6075 #[test]
6076 fn test_failed_pivot_method_pass() {
6077 let n = 4;
6080 let a = symmetric_matrix(n, |i, j| {
6081 let vals = [
6082 [0.001, 5.0, 0.01, 0.01],
6083 [5.0, 0.001, 0.01, 0.01],
6084 [0.01, 0.01, 0.001, 5.0],
6085 [0.01, 0.01, 5.0, 0.001],
6086 ];
6087 vals[i][j]
6088 });
6089
6090 let opts_pass = AptpOptions {
6093 inner_block_size: 2,
6094 failed_pivot_method: FailedPivotMethod::Pass,
6095 ..AptpOptions::default()
6096 };
6097 let result_pass = aptp_factor(a.as_ref(), &opts_pass).unwrap();
6098
6099 let opts_tpp = AptpOptions {
6100 inner_block_size: 2,
6101 failed_pivot_method: FailedPivotMethod::Tpp,
6102 ..AptpOptions::default()
6103 };
6104 let result_tpp = aptp_factor(a.as_ref(), &opts_tpp).unwrap();
6105
6106 assert!(
6108 result_tpp.stats.num_delayed <= result_pass.stats.num_delayed,
6109 "TPP delays {} > Pass delays {}",
6110 result_tpp.stats.num_delayed,
6111 result_pass.stats.num_delayed
6112 );
6113
6114 let sum_pass = result_pass.stats.num_1x1
6116 + 2 * result_pass.stats.num_2x2
6117 + result_pass.stats.num_delayed;
6118 let sum_tpp =
6119 result_tpp.stats.num_1x1 + 2 * result_tpp.stats.num_2x2 + result_tpp.stats.num_delayed;
6120 assert_eq!(sum_pass, n);
6121 assert_eq!(sum_tpp, n);
6122 }
6123
6124 #[test]
6125 fn test_tpp_zero_pivot_handling() {
6126 let n = 4;
6128 let a = symmetric_matrix(n, |i, j| {
6129 if i == j {
6130 [5.0, 0.0, 0.0, 3.0][i]
6131 } else if (i == 0 && j == 3) || (i == 3 && j == 0) {
6132 0.1
6133 } else {
6134 0.0
6135 }
6136 });
6137
6138 let opts = AptpOptions {
6139 failed_pivot_method: FailedPivotMethod::Tpp,
6140 ..AptpOptions::default()
6141 };
6142 let result = aptp_factor(a.as_ref(), &opts).unwrap();
6143
6144 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
6146 assert_eq!(sum, n, "invariant: {} != {}", sum, n);
6147 }
6148
6149 #[test]
6152 fn test_tpp_helpers_rect() {
6153 let a = Mat::from_fn(5, 3, |i, j| {
6162 let vals: [[f64; 3]; 5] = [
6163 [4.0, 0.0, 0.0],
6164 [0.5, -3.0, 0.0],
6165 [0.1, 0.2, 5.0],
6166 [0.3, -0.4, 0.6],
6167 [0.7, 0.8, -0.9],
6168 ];
6169 vals[i][j]
6170 });
6171
6172 assert!((tpp_sym_entry(a.as_ref(), 3, 1) - (-0.4)).abs() < 1e-15);
6174 assert!((tpp_sym_entry(a.as_ref(), 0, 2) - 0.1).abs() < 1e-15);
6176 assert!((tpp_sym_entry(a.as_ref(), 1, 0) - 0.5).abs() < 1e-15);
6178
6179 assert!(tpp_is_col_small(a.as_ref(), 0, 0, 5, 5.0));
6181 assert!(!tpp_is_col_small(a.as_ref(), 0, 0, 5, 0.01));
6183 assert!(!tpp_is_col_small(a.as_ref(), 3, 0, 5, 0.5));
6186 assert!(tpp_is_col_small(a.as_ref(), 3, 0, 5, 1.0));
6188
6189 let t = tpp_find_row_abs_max(a.as_ref(), 4, 0, 3);
6192 assert_eq!(t, 2, "expected col 2, got {t}");
6193
6194 let max_exc = tpp_find_rc_abs_max_exclude(a.as_ref(), 1, 0, 5, 3);
6199 assert!((max_exc - 0.8).abs() < 1e-15, "expected 0.8, got {max_exc}");
6200
6201 let max_exc2 = tpp_find_rc_abs_max_exclude(a.as_ref(), 3, 0, 5, usize::MAX);
6206 assert!(
6207 (max_exc2 - 0.6).abs() < 1e-15,
6208 "expected 0.6, got {max_exc2}"
6209 );
6210 }
6211
6212 fn rect_reconstruction_error(
6222 original_rect: &Mat<f64>,
6223 factored_rect: &Mat<f64>,
6224 k: usize,
6225 result: &AptpFactorResult,
6226 ) -> f64 {
6227 let ne = result.num_eliminated;
6228 if ne == 0 {
6229 return 0.0;
6230 }
6231
6232 let mut l = Mat::zeros(ne, ne);
6234 let mut col = 0;
6235 while col < ne {
6236 l[(col, col)] = 1.0;
6237 match result.d.pivot_type(col) {
6238 PivotType::OneByOne => {
6239 for r in (col + 1)..ne {
6240 l[(r, col)] = factored_rect[(r, col)];
6241 }
6242 col += 1;
6243 }
6244 PivotType::TwoByTwo { partner } if partner > col => {
6245 l[(col + 1, col + 1)] = 1.0;
6246 for r in (col + 2)..ne {
6247 l[(r, col)] = factored_rect[(r, col)];
6248 l[(r, col + 1)] = factored_rect[(r, col + 1)];
6249 }
6250 col += 2;
6251 }
6252 _ => {
6253 col += 1;
6254 }
6255 }
6256 }
6257
6258 let mut d_mat = Mat::zeros(ne, ne);
6260 col = 0;
6261 while col < ne {
6262 match result.d.pivot_type(col) {
6263 PivotType::OneByOne => {
6264 d_mat[(col, col)] = result.d.diagonal_1x1(col);
6265 col += 1;
6266 }
6267 PivotType::TwoByTwo { partner } if partner > col => {
6268 let block = result.d.diagonal_2x2(col);
6269 d_mat[(col, col)] = block.a;
6270 d_mat[(col, col + 1)] = block.b;
6271 d_mat[(col + 1, col)] = block.b;
6272 d_mat[(col + 1, col + 1)] = block.c;
6273 col += 2;
6274 }
6275 _ => {
6276 col += 1;
6277 }
6278 }
6279 }
6280
6281 let mut w = Mat::zeros(ne, ne);
6283 for i in 0..ne {
6284 for j in 0..ne {
6285 let mut sum = 0.0;
6286 for kk in 0..ne {
6287 sum += l[(i, kk)] * d_mat[(kk, j)];
6288 }
6289 w[(i, j)] = sum;
6290 }
6291 }
6292
6293 let mut ldlt = Mat::zeros(ne, ne);
6295 for i in 0..ne {
6296 for j in 0..ne {
6297 let mut sum = 0.0;
6298 for kk in 0..ne {
6299 sum += w[(i, kk)] * l[(j, kk)];
6300 }
6301 ldlt[(i, j)] = sum;
6302 }
6303 }
6304
6305 let perm = &result.perm;
6309 let mut pap = Mat::zeros(ne, ne);
6310 for i in 0..ne {
6311 for j in 0..ne {
6312 let pi = perm[i];
6313 let pj = perm[j];
6314 let val = if pi >= pj && pj < k {
6316 original_rect[(pi, pj)]
6317 } else if pj > pi && pi < k {
6318 original_rect[(pj, pi)]
6319 } else {
6320 0.0
6321 };
6322 pap[(i, j)] = val;
6323 }
6324 }
6325
6326 let mut diff_sq = 0.0_f64;
6328 let mut orig_sq = 0.0_f64;
6329 for i in 0..ne {
6330 for j in 0..ne {
6331 let d = pap[(i, j)] - ldlt[(i, j)];
6332 diff_sq += d * d;
6333 orig_sq += pap[(i, j)] * pap[(i, j)];
6334 }
6335 }
6336 if orig_sq == 0.0 {
6337 return diff_sq.sqrt();
6338 }
6339 (diff_sq / orig_sq).sqrt()
6340 }
6341
6342 #[test]
6343 fn test_tpp_factor_rect_basic_1x1() {
6344 let m = 6;
6347 let k = 4;
6348 let mut a = Mat::zeros(m, k);
6349 let vals: [[f64; 4]; 6] = [
6351 [10.0, 0.0, 0.0, 0.0],
6352 [0.1, -8.0, 0.0, 0.0],
6353 [0.2, 0.1, 12.0, 0.0],
6354 [0.1, -0.2, 0.3, 7.0],
6355 [0.3, 0.1, -0.1, 0.2], [0.1, -0.3, 0.2, 0.1], ];
6358 for i in 0..m {
6359 for j in 0..k {
6360 a[(i, j)] = vals[i][j];
6361 }
6362 }
6363
6364 let original = a.clone();
6365 let opts = AptpOptions::default();
6366 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6367
6368 assert_eq!(
6369 result.num_eliminated, 4,
6370 "expected all 4 columns eliminated"
6371 );
6372 assert_eq!(result.stats.num_delayed, 0);
6373
6374 let error = rect_reconstruction_error(&original, &a, k, &result);
6375 assert!(
6376 error < 1e-12,
6377 "rect 1x1 reconstruction error {error:.2e} >= 1e-12"
6378 );
6379 }
6380
6381 #[test]
6382 fn test_tpp_factor_rect_basic_2x2() {
6383 let m = 6;
6386 let k = 4;
6387 let mut a = Mat::zeros(m, k);
6388 let vals: [[f64; 4]; 6] = [
6389 [0.001, 0.0, 0.0, 0.0],
6390 [5.0, 0.001, 0.0, 0.0],
6391 [0.01, 0.01, 0.001, 0.0],
6392 [0.01, 0.01, 5.0, 0.001],
6393 [0.1, 0.2, 0.1, 0.2], [0.3, 0.1, 0.3, 0.1], ];
6396 for i in 0..m {
6397 for j in 0..k {
6398 a[(i, j)] = vals[i][j];
6399 }
6400 }
6401
6402 let original = a.clone();
6403 let opts = AptpOptions::default();
6404 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6405
6406 assert_eq!(result.stats.num_delayed, 0, "expected no delays");
6407 assert!(result.stats.num_2x2 > 0, "expected at least one 2x2 pivot");
6408
6409 let error = rect_reconstruction_error(&original, &a, k, &result);
6410 assert!(
6411 error < 1e-12,
6412 "rect 2x2 reconstruction error {error:.2e} >= 1e-12"
6413 );
6414 }
6415
6416 #[test]
6417 fn test_tpp_factor_rect_with_delays() {
6418 let m = 8;
6421 let k = 4;
6422 let mut a = Mat::zeros(m, k);
6423 let vals: [[f64; 4]; 8] = [
6425 [1e-25, 0.0, 0.0, 0.0],
6426 [1e-25, 1e-25, 0.0, 0.0],
6427 [1e-25, 1e-25, 1e-25, 0.0],
6428 [1e-25, 1e-25, 1e-25, 1e-25],
6429 [0.1, 0.2, 0.3, 0.4],
6430 [0.5, 0.6, 0.7, 0.8],
6431 [0.2, 0.3, 0.4, 0.5],
6432 [0.1, 0.1, 0.1, 0.1],
6433 ];
6434 for i in 0..m {
6435 for j in 0..k {
6436 a[(i, j)] = vals[i][j];
6437 }
6438 }
6439
6440 let opts = AptpOptions::default();
6441 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6442
6443 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
6446 assert_eq!(sum, k, "invariant: {sum} != {k}");
6447 }
6448
6449 #[test]
6450 fn test_tpp_factor_rect_square_matches() {
6451 let n = 4;
6454 let a = symmetric_matrix(n, |i, j| {
6455 let vals = [
6456 [10.0, 0.5, 0.1, 0.3],
6457 [0.5, -8.0, 0.2, -0.4],
6458 [0.1, 0.2, 12.0, 0.6],
6459 [0.3, -0.4, 0.6, -5.0],
6460 ];
6461 vals[i][j]
6462 });
6463
6464 let opts = AptpOptions::default();
6465
6466 let mut a_rect = a.clone();
6468 let result_rect = tpp_factor_rect(a_rect.as_mut(), n, &opts).unwrap();
6469
6470 let mut a_sq = a.clone();
6472 let mut col_order: Vec<usize> = (0..n).collect();
6473 let mut d_sq = MixedDiagonal::new(n);
6474 let mut stats_sq = AptpStatistics::default();
6475 let mut pivot_log_sq = Vec::with_capacity(n);
6476 let ne_sq = tpp_factor(
6477 a_sq.as_mut(),
6478 0,
6479 n,
6480 &mut col_order,
6481 &mut d_sq,
6482 &mut stats_sq,
6483 &mut pivot_log_sq,
6484 &opts,
6485 );
6486 d_sq.truncate(ne_sq);
6487
6488 assert_eq!(
6490 result_rect.num_eliminated, ne_sq,
6491 "num_eliminated: rect={} sq={}",
6492 result_rect.num_eliminated, ne_sq
6493 );
6494
6495 assert_eq!(&result_rect.perm[..n], &col_order[..n], "perm mismatch");
6497
6498 let ne = result_rect.num_eliminated;
6500 let mut col = 0;
6501 while col < ne {
6502 match result_rect.d.pivot_type(col) {
6503 PivotType::OneByOne => {
6504 let d_r = result_rect.d.diagonal_1x1(col);
6505 let d_s = d_sq.diagonal_1x1(col);
6506 assert!(
6507 (d_r - d_s).abs() < 1e-15,
6508 "D mismatch at col {col}: rect={d_r} sq={d_s}"
6509 );
6510 col += 1;
6511 }
6512 PivotType::TwoByTwo { partner } if partner > col => {
6513 let br = result_rect.d.diagonal_2x2(col);
6514 let bs = d_sq.diagonal_2x2(col);
6515 assert!((br.a - bs.a).abs() < 1e-15, "D.a mismatch at col {col}");
6516 assert!((br.b - bs.b).abs() < 1e-15, "D.b mismatch at col {col}");
6517 assert!((br.c - bs.c).abs() < 1e-15, "D.c mismatch at col {col}");
6518 col += 2;
6519 }
6520 _ => {
6521 col += 1;
6522 }
6523 }
6524 }
6525
6526 for j in 0..n {
6528 for i in j..n {
6529 let vr = a_rect[(i, j)];
6530 let vs = a_sq[(i, j)];
6531 assert!(
6532 (vr - vs).abs() == 0.0,
6533 "matrix mismatch at ({i},{j}): rect={vr} sq={vs}"
6534 );
6535 }
6536 }
6537 }
6538
6539 #[test]
6540 fn test_tpp_factor_rect_reconstruction_stress() {
6541 use rand::SeedableRng;
6543 use rand::rngs::StdRng;
6544 use rand_distr::{Distribution, Uniform};
6545
6546 let m = 64;
6547 let k = 32;
6548 let mut rng = StdRng::seed_from_u64(99);
6549 let dist = Uniform::new(-1.0f64, 1.0).unwrap();
6550
6551 let mut a = Mat::zeros(m, k);
6552 for i in 0..k {
6554 for j in 0..=i {
6555 let v: f64 = dist.sample(&mut rng);
6556 a[(i, j)] = v;
6557 }
6558 let sign = if i % 3 == 0 { -1.0 } else { 1.0 };
6560 a[(i, i)] = sign * (5.0 + dist.sample(&mut rng).abs());
6561 }
6562 for i in k..m {
6564 for j in 0..k {
6565 a[(i, j)] = dist.sample(&mut rng);
6566 }
6567 }
6568
6569 let original = a.clone();
6570 let opts = AptpOptions::default();
6571 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6572
6573 let error = rect_reconstruction_error(&original, &a, k, &result);
6574 assert!(
6575 error < 1e-10,
6576 "rect stress reconstruction error {error:.2e} >= 1e-10"
6577 );
6578
6579 let sum = result.stats.num_1x1 + 2 * result.stats.num_2x2 + result.stats.num_delayed;
6581 assert_eq!(sum, k, "statistics invariant: {sum} != {k}");
6582 }
6583
6584 #[test]
6585 fn test_compute_contribution_gemm_rect_matches_square() {
6586 let m = 8;
6590 let k = 4;
6591 let nfs = m - k;
6592
6593 let a_full = symmetric_matrix(m, |i, j| {
6595 if i == j {
6596 [10.0, -8.0, 12.0, 7.0, 3.0, -2.0, 5.0, 4.0][i]
6597 } else {
6598 0.1 * (i as f64 + j as f64 + 1.0) * if (i + j) % 2 == 0 { 1.0 } else { -1.0 }
6599 }
6600 });
6601
6602 let mut a_rect = Mat::zeros(m, k);
6604 for i in 0..m {
6605 for j in 0..k {
6606 a_rect[(i, j)] = a_full[(i, j)];
6607 }
6608 }
6609 let opts = AptpOptions::default();
6610 let result = tpp_factor_rect(a_rect.as_mut(), k, &opts).unwrap();
6611 let ne = result.num_eliminated;
6612
6613 if ne == 0 {
6614 return; }
6616
6617 let mut a_sq_factor = a_full.clone();
6619 let result_sq = tpp_factor_rect(a_sq_factor.as_mut(), k, &opts).unwrap();
6620 let mut contrib_rect = Mat::zeros(nfs, nfs);
6625 let perm = &result.perm;
6627 for i in 0..nfs {
6628 for j in 0..=i {
6629 let pi = perm[k + i];
6630 let pj = perm[k + j];
6631 let val = if pi >= pj {
6632 a_full[(pi, pj)]
6633 } else {
6634 a_full[(pj, pi)]
6635 };
6636 contrib_rect[(i, j)] = val;
6637 }
6638 }
6639 let mut contrib_rect2 = contrib_rect.clone();
6640 let mut ld_ws = Mat::zeros(nfs, ne);
6641 compute_contribution_gemm_rect(
6642 &a_rect,
6643 k,
6644 ne,
6645 m,
6646 &result.d,
6647 &mut contrib_rect2,
6648 &mut ld_ws,
6649 Par::Seq,
6650 );
6651
6652 let mut contrib_sq = Mat::zeros(nfs, nfs);
6654 for i in 0..nfs {
6655 for j in 0..=i {
6656 let pi = perm[k + i];
6657 let pj = perm[k + j];
6658 let val = if pi >= pj {
6659 a_full[(pi, pj)]
6660 } else {
6661 a_full[(pj, pi)]
6662 };
6663 contrib_sq[(i, j)] = val;
6664 }
6665 }
6666 let mut frontal_sq = Mat::zeros(m, m);
6669 for j in 0..k {
6671 for i in 0..m {
6672 frontal_sq[(i, j)] = a_sq_factor[(i, j)];
6673 }
6674 }
6675 for i in 0..nfs {
6677 for j in 0..=i {
6678 frontal_sq[(k + i, k + j)] = contrib_sq[(i, j)];
6679 }
6680 }
6681 let mut ld_ws2 = Mat::zeros(nfs, ne);
6682 compute_contribution_gemm(
6683 &frontal_sq,
6684 k,
6685 ne,
6686 m,
6687 &result_sq.d,
6688 &mut contrib_sq,
6689 &mut ld_ws2,
6690 Par::Seq,
6691 );
6692
6693 for i in 0..nfs {
6695 for j in 0..=i {
6696 let vr = contrib_rect2[(i, j)];
6697 let vs = contrib_sq[(i, j)];
6698 assert!(
6699 (vr - vs).abs() < 1e-12,
6700 "contrib mismatch at ({i},{j}): rect={vr} sq={vs}"
6701 );
6702 }
6703 }
6704 }
6705
6706 #[test]
6707 fn test_extract_front_factors_rect_round_trip() {
6708 let m = 6;
6711 let k = 4;
6712 let mut a = Mat::zeros(m, k);
6713 let vals: [[f64; 4]; 6] = [
6714 [10.0, 0.0, 0.0, 0.0],
6715 [0.5, -8.0, 0.0, 0.0],
6716 [0.2, 0.3, 12.0, 0.0],
6717 [0.1, -0.1, 0.4, 7.0],
6718 [0.3, 0.1, -0.2, 0.5],
6719 [0.1, -0.3, 0.2, 0.1],
6720 ];
6721 for i in 0..m {
6722 for j in 0..k {
6723 a[(i, j)] = vals[i][j];
6724 }
6725 }
6726
6727 let opts = AptpOptions::default();
6728 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6729 let ne = result.num_eliminated;
6730 assert!(ne > 0, "need at least one eliminated column");
6731
6732 let row_indices: Vec<usize> = (0..m).collect();
6733 let ff = extract_front_factors_rect(&a, m, k, &row_indices, &result);
6734
6735 assert_eq!(ff.num_eliminated, ne);
6736 assert_eq!(ff.l11.nrows(), ne);
6737 assert_eq!(ff.l11.ncols(), ne);
6738
6739 for i in 0..ne {
6741 assert!(
6742 (ff.l11[(i, i)] - 1.0).abs() < 1e-15,
6743 "L11 diagonal at {i} is not 1.0: {}",
6744 ff.l11[(i, i)]
6745 );
6746 for j in (i + 1)..ne {
6747 assert!(
6748 ff.l11[(i, j)].abs() < 1e-15,
6749 "L11 upper triangle at ({i},{j}) is not 0: {}",
6750 ff.l11[(i, j)]
6751 );
6752 }
6753 }
6754
6755 assert_eq!(ff.l21.nrows(), m - ne);
6757 assert_eq!(ff.l21.ncols(), ne);
6758
6759 assert_eq!(ff.d11.dimension(), ne);
6761 }
6762
6763 #[test]
6764 fn test_extract_contribution_rect_with_delays() {
6765 let m = 6;
6768 let k = 4;
6769 let mut a = Mat::zeros(m, k);
6770 let vals: [[f64; 4]; 6] = [
6772 [10.0, 0.0, 0.0, 0.0],
6773 [0.5, -8.0, 0.0, 0.0],
6774 [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.3, 0.1, 0.0, 0.0],
6777 [0.1, -0.3, 0.0, 0.0],
6778 ];
6779 for i in 0..m {
6780 for j in 0..k {
6781 a[(i, j)] = vals[i][j];
6782 }
6783 }
6784
6785 let opts = AptpOptions::default();
6786 let result = tpp_factor_rect(a.as_mut(), k, &opts).unwrap();
6787 let ne = result.num_eliminated;
6788
6789 assert_eq!(ne, 4, "expected all 4 eliminated (2 real + 2 zero)");
6791
6792 let nfs = m - k;
6793 let mut contrib_buffer = Mat::zeros(nfs, nfs);
6794 let mut ld_ws = Mat::zeros(nfs, ne);
6795 compute_contribution_gemm_rect(
6796 &a,
6797 k,
6798 ne,
6799 m,
6800 &result.d,
6801 &mut contrib_buffer,
6802 &mut ld_ws,
6803 Par::Seq,
6804 );
6805
6806 let row_indices: Vec<usize> = (0..m).collect();
6807 let contrib = extract_contribution_rect(&a, m, k, &row_indices, &result, contrib_buffer);
6808
6809 assert_eq!(contrib.num_delayed, 0);
6811 assert_eq!(contrib.data.nrows(), nfs);
6813 assert_eq!(contrib.row_indices.len(), nfs);
6814 }
6815
6816 #[test]
6817 fn test_swap_rect_boundary() {
6818 let mut a = Mat::from_fn(5, 3, |i, j| (i * 10 + j) as f64);
6820
6821 swap_rect(a.as_mut(), 1, 4);
6824 assert!((a[(1, 0)] - 40.0).abs() < 1e-15, "a[(1,0)]={}", a[(1, 0)]);
6827 assert!((a[(4, 0)] - 10.0).abs() < 1e-15, "a[(4,0)]={}", a[(4, 0)]);
6828 assert!((a[(2, 1)] - 42.0).abs() < 1e-15, "a[(2,1)]={}", a[(2, 1)]);
6831 assert!((a[(4, 2)] - 21.0).abs() < 1e-15, "a[(4,2)]={}", a[(4, 2)]);
6832
6833 let mut b = Mat::from_fn(5, 3, |i, j| (i * 10 + j) as f64);
6835 swap_rect(b.as_mut(), 0, 2);
6837 assert!((b[(0, 0)] - 22.0).abs() < 1e-15, "b[(0,0)]={}", b[(0, 0)]);
6839 assert!((b[(2, 2)] - 0.0).abs() < 1e-15, "b[(2,2)]={}", b[(2, 2)]);
6840 assert!((b[(3, 0)] - 32.0).abs() < 1e-15, "b[(3,0)]={}", b[(3, 0)]);
6842 assert!((b[(3, 2)] - 30.0).abs() < 1e-15, "b[(3,2)]={}", b[(3, 2)]);
6843 }
6844
6845 use crate::testing::perturbations::TortureTestConfig;
6857 use rand::RngExt;
6858 use rand::SeedableRng;
6859 use rand::rngs::StdRng;
6860
6861 fn ldlt_app_torture_test(m: usize, n: usize, config: &TortureTestConfig) {
6863 use crate::testing::perturbations;
6864
6865 let options = AptpOptions {
6866 inner_block_size: 32.min(n),
6867 ..AptpOptions::default()
6868 };
6869 let num_fully_summed = n;
6870
6871 let mut rng = StdRng::seed_from_u64(config.seed);
6872 let mut failures = Vec::new();
6873
6874 for instance in 0..config.num_instances {
6875 let mut a = perturbations::generate_dense_symmetric_indefinite(m, &mut rng);
6876
6877 let roll: f64 = rng.random::<f64>();
6879 let is_singular;
6880 if roll < config.delay_probability {
6881 perturbations::cause_delays(&mut a, options.inner_block_size, &mut rng);
6882 is_singular = false;
6883 } else if roll < config.delay_probability + config.singular_probability {
6884 if m >= 2 {
6885 let col1 = RngExt::random_range(&mut rng, 0..m);
6886 let mut col2 = RngExt::random_range(&mut rng, 0..m);
6887 while col2 == col1 {
6888 col2 = RngExt::random_range(&mut rng, 0..m);
6889 }
6890 perturbations::make_singular(&mut a, col1, col2);
6891 }
6892 is_singular = true;
6893 } else {
6894 if m >= options.inner_block_size && options.inner_block_size >= 2 {
6895 let max_start = m - options.inner_block_size;
6896 let block_row = RngExt::random_range(&mut rng, 0..=max_start);
6897 perturbations::make_dblk_singular(&mut a, block_row, options.inner_block_size);
6898 }
6899 is_singular = true;
6900 }
6901
6902 let original = a.clone();
6904
6905 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
6906 aptp_factor_in_place(a.as_mut(), num_fully_summed, &options)
6907 }));
6908
6909 match result {
6910 Err(_) => {
6911 failures.push(format!("instance {}: PANIC", instance));
6912 }
6913 Ok(Err(e)) => {
6914 if !is_singular {
6915 failures.push(format!("instance {}: unexpected error: {}", instance, e));
6916 }
6917 }
6918 Ok(Ok(ref fr)) => {
6919 let total = fr.num_eliminated + fr.delayed_cols.len();
6920 if total != num_fully_summed {
6921 failures.push(format!(
6922 "instance {}: elim({}) + delayed({}) = {} != nfs({})",
6923 instance,
6924 fr.num_eliminated,
6925 fr.delayed_cols.len(),
6926 total,
6927 num_fully_summed
6928 ));
6929 continue;
6930 }
6931 if m == n && !is_singular && fr.num_eliminated == num_fully_summed {
6933 let l = extract_l(a.as_ref(), &fr.d, fr.num_eliminated);
6934 let err = dense_reconstruction_error(
6935 &original,
6936 &l,
6937 &fr.d,
6938 &fr.perm[..fr.num_eliminated],
6939 );
6940 if err > config.backward_error_threshold {
6941 failures.push(format!(
6942 "instance {}: recon error {:.2e} > {:.2e}",
6943 instance, err, config.backward_error_threshold
6944 ));
6945 }
6946 }
6947 }
6948 }
6949 }
6950
6951 assert!(
6952 failures.is_empty(),
6953 "APP torture (m={}, n={}) had {} failures:\n{}",
6954 m,
6955 n,
6956 failures.len(),
6957 failures.join("\n")
6958 );
6959 }
6960
6961 fn ldlt_tpp_torture_test(m: usize, n: usize, config: &TortureTestConfig) {
6963 use crate::testing::perturbations;
6964
6965 let options = AptpOptions {
6966 inner_block_size: n + 1, ..AptpOptions::default()
6968 };
6969 let num_fully_summed = n;
6970
6971 let mut rng = StdRng::seed_from_u64(config.seed);
6972 let mut failures = Vec::new();
6973
6974 for instance in 0..config.num_instances {
6975 let mut a = perturbations::generate_dense_symmetric_indefinite(m, &mut rng);
6976
6977 let roll: f64 = rng.random::<f64>();
6979 let is_singular;
6980 if roll < config.delay_probability {
6981 perturbations::cause_delays(&mut a, options.inner_block_size, &mut rng);
6982 is_singular = false;
6983 } else {
6984 if m >= 2 {
6985 let col1 = RngExt::random_range(&mut rng, 0..m);
6986 let mut col2 = RngExt::random_range(&mut rng, 0..m);
6987 while col2 == col1 {
6988 col2 = RngExt::random_range(&mut rng, 0..m);
6989 }
6990 perturbations::make_singular(&mut a, col1, col2);
6991 }
6992 is_singular = true;
6993 }
6994
6995 let original = a.clone();
6997
6998 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
6999 aptp_factor_in_place(a.as_mut(), num_fully_summed, &options)
7000 }));
7001
7002 match result {
7003 Err(_) => {
7004 failures.push(format!("instance {}: PANIC", instance));
7005 }
7006 Ok(Err(e)) => {
7007 if !is_singular {
7008 failures.push(format!("instance {}: unexpected error: {}", instance, e));
7009 }
7010 }
7011 Ok(Ok(ref fr)) => {
7012 let total = fr.num_eliminated + fr.delayed_cols.len();
7013 if total != num_fully_summed {
7014 failures.push(format!(
7015 "instance {}: elim({}) + delayed({}) = {} != nfs({})",
7016 instance,
7017 fr.num_eliminated,
7018 fr.delayed_cols.len(),
7019 total,
7020 num_fully_summed
7021 ));
7022 continue;
7023 }
7024 if m == n && !is_singular && fr.num_eliminated == num_fully_summed {
7026 let l = extract_l(a.as_ref(), &fr.d, fr.num_eliminated);
7027 let err = dense_reconstruction_error(
7028 &original,
7029 &l,
7030 &fr.d,
7031 &fr.perm[..fr.num_eliminated],
7032 );
7033 if err > config.backward_error_threshold {
7034 failures.push(format!(
7035 "instance {}: recon error {:.2e} > {:.2e}",
7036 instance, err, config.backward_error_threshold
7037 ));
7038 }
7039 let l_bound = 1.0 / options.threshold;
7041 let mut max_l = 0.0_f64;
7042 for i in 0..l.nrows() {
7043 for j in 0..fr.num_eliminated {
7044 if i != j {
7045 max_l = max_l.max(l[(i, j)].abs());
7046 }
7047 }
7048 }
7049 if max_l > l_bound * (1.0 + 1e-10) {
7050 failures.push(format!(
7051 "instance {}: L growth |L|={:.4} > 1/u={:.4}",
7052 instance, max_l, l_bound
7053 ));
7054 }
7055 }
7056 }
7057 }
7058 }
7059
7060 assert!(
7061 failures.is_empty(),
7062 "TPP torture (m={}, n={}) had {} failures:\n{}",
7063 m,
7064 n,
7065 failures.len(),
7066 failures.join("\n")
7067 );
7068 }
7069
7070 #[test]
7073 #[ignore] fn torture_app_32x32() {
7075 let config = TortureTestConfig {
7076 num_instances: 500,
7077 seed: 42_000,
7078 ..TortureTestConfig::default()
7079 };
7080 ldlt_app_torture_test(32, 32, &config);
7081 }
7082
7083 #[test]
7084 #[ignore]
7085 fn torture_app_64x64() {
7086 let config = TortureTestConfig {
7087 num_instances: 500,
7088 seed: 42_001,
7089 ..TortureTestConfig::default()
7090 };
7091 ldlt_app_torture_test(64, 64, &config);
7092 }
7093
7094 #[test]
7095 #[ignore]
7096 fn torture_app_128x128() {
7097 let config = TortureTestConfig {
7098 num_instances: 500,
7099 seed: 42_002,
7100 ..TortureTestConfig::default()
7101 };
7102 ldlt_app_torture_test(128, 128, &config);
7103 }
7104
7105 #[test]
7106 #[ignore]
7107 fn torture_app_128x48() {
7108 let config = TortureTestConfig {
7109 num_instances: 500,
7110 seed: 42_003,
7111 ..TortureTestConfig::default()
7112 };
7113 ldlt_app_torture_test(128, 48, &config);
7114 }
7115
7116 #[test]
7117 #[ignore]
7118 fn torture_app_256x256() {
7119 let config = TortureTestConfig {
7120 num_instances: 500,
7121 seed: 42_004,
7122 ..TortureTestConfig::default()
7123 };
7124 ldlt_app_torture_test(256, 256, &config);
7125 }
7126
7127 #[test]
7130 #[ignore]
7131 fn torture_tpp_8x4() {
7132 let config = TortureTestConfig {
7133 num_instances: 500,
7134 seed: 43_000,
7135 ..TortureTestConfig::default()
7136 };
7137 ldlt_tpp_torture_test(8, 4, &config);
7138 }
7139
7140 #[test]
7141 #[ignore]
7142 fn torture_tpp_33x21() {
7143 let config = TortureTestConfig {
7144 num_instances: 500,
7145 seed: 43_001,
7146 ..TortureTestConfig::default()
7147 };
7148 ldlt_tpp_torture_test(33, 21, &config);
7149 }
7150
7151 #[test]
7152 #[ignore]
7153 fn torture_tpp_100x100() {
7154 let config = TortureTestConfig {
7155 num_instances: 500,
7156 seed: 43_002,
7157 ..TortureTestConfig::default()
7158 };
7159 ldlt_tpp_torture_test(100, 100, &config);
7160 }
7161
7162 #[test]
7163 #[ignore]
7164 fn torture_tpp_100x50() {
7165 let config = TortureTestConfig {
7166 num_instances: 500,
7167 seed: 43_003,
7168 ..TortureTestConfig::default()
7169 };
7170 ldlt_tpp_torture_test(100, 50, &config);
7171 }
7172
7173 use crate::testing::strategies;
7178 use proptest::prelude::*;
7179
7180 proptest! {
7181 #![proptest_config(ProptestConfig::with_cases(256))]
7182
7183 #[test]
7184 fn property_pd_reconstruction(
7185 a in strategies::arb_symmetric_pd(5..=100)
7186 ) {
7187 let n = a.nrows();
7188 let options = AptpOptions::default();
7189 let result = aptp_factor(a.as_ref(), &options);
7190 if let Ok(ref f) = result {
7191 let perm_fwd = f.perm.as_ref().arrays().0;
7192 let err = dense_reconstruction_error(&a, &f.l, &f.d, perm_fwd);
7193 prop_assert!(
7194 err < 1e-12,
7195 "PD reconstruction error {:.2e} for n={}", err, n
7196 );
7197 }
7198 }
7199
7200 #[test]
7201 fn property_inertia_sum(
7202 a in strategies::arb_symmetric_indefinite(5..=100)
7203 ) {
7204 let n = a.nrows();
7205 let options = AptpOptions::default();
7206 let result = aptp_factor(a.as_ref(), &options);
7207 if let Ok(ref f) = result {
7208 let inertia = f.d.compute_inertia();
7209 let num_eliminated = n - f.delayed_cols.len();
7210 prop_assert_eq!(
7212 inertia.dimension(), num_eliminated,
7213 "inertia sum {} != num_eliminated={} (n={}, delayed={})",
7214 inertia.dimension(), num_eliminated, n, f.delayed_cols.len()
7215 );
7216 }
7217 }
7218
7219 #[test]
7220 fn property_permutation_valid(
7221 a in strategies::arb_symmetric_indefinite(5..=100)
7222 ) {
7223 let n = a.nrows();
7224 let options = AptpOptions::default();
7225 let result = aptp_factor(a.as_ref(), &options);
7226 if let Ok(ref f) = result {
7227 let (fwd, inv) = f.perm.as_ref().arrays();
7228 let mut seen = vec![false; n];
7230 for &idx in fwd {
7231 prop_assert!(idx < n, "perm fwd index {} out of bounds for n={}", idx, n);
7232 prop_assert!(!seen[idx], "duplicate in perm fwd: {}", idx);
7233 seen[idx] = true;
7234 }
7235 for i in 0..n {
7237 prop_assert_eq!(
7238 inv[fwd[i]], i,
7239 "perm fwd/inv not inverses at i={}", i
7240 );
7241 }
7242 }
7243 }
7244
7245 #[test]
7246 fn property_no_panics_perturbed(
7247 a in strategies::arb_symmetric_indefinite(5..=50),
7248 seed in any::<u64>()
7249 ) {
7250 use crate::testing::perturbations as perturb;
7251 let mut a_mut = a.clone();
7252 let mut rng = StdRng::seed_from_u64(seed);
7253 perturb::cause_delays(&mut a_mut, 32, &mut rng);
7254
7255 let options = AptpOptions::default();
7256 let _ = aptp_factor(a_mut.as_ref(), &options);
7258 }
7259 }
7260}