1use crate::{
38 ghost::{self, Array, Idx, MaybeIdx},
39 mem::{self, NONE},
40 windows2, FaerError, Index, SignedIndex, SymbolicSparseColMatRef,
41};
42use core::{cell::Cell, iter::zip};
43use dyn_stack::{PodStack, SizeOverflow, StackReq};
44use faer_core::{assert, ComplexField};
45use reborrow::*;
46
47#[inline]
48fn post_tree<'n, I: Index>(
49 root: Idx<'n, usize>,
50 k: usize,
51 child: &mut Array<'n, MaybeIdx<'n, I>>,
52 sibling: &Array<'n, MaybeIdx<'n, I>>,
53 order: &mut Array<'n, I::Signed>,
54 stack: &mut Array<'n, I::Signed>,
55) -> usize {
56 let N = child.len();
57 let I = I::Signed::truncate;
58 let mut top = 1usize;
59 stack[N.check(0)] = I(*root);
60
61 let mut k = k;
62
63 while top > 0 {
64 let i = N.check(stack[N.check(top - 1)].zx());
65 if let Some(child_) = child[i].idx() {
66 let mut f = child_.zx();
67 loop {
68 top += 1;
69 if let Some(new_f) = sibling[f].idx() {
70 f = new_f.zx();
71 } else {
72 break;
73 }
74 }
75
76 let mut t = top;
77 let mut f = child_.zx();
78 loop {
79 t -= 1;
80 stack[N.check(t)] = I(*f);
81 if let Some(new_f) = sibling[f].idx() {
82 f = new_f.zx();
83 } else {
84 break;
85 }
86 }
87 child[i] = MaybeIdx::none();
88 } else {
89 top -= 1;
90 order[i] = I(k);
91 k += 1;
92 }
93 }
94
95 k
96}
97
98#[inline]
99fn postorder<'n, 'out, I: Index>(
100 order: &'out mut Array<'n, I::Signed>,
101 etree: &Array<'n, MaybeIdx<'n, I>>,
102 nv: &Array<'n, I::Signed>,
103 f_size: &Array<'n, I::Signed>,
104 stack: PodStack<'_>,
105) {
106 let N = order.len();
107 let n = *N;
108 if n == 0 {
109 return;
110 }
111
112 let I = I::Signed::truncate;
113 let zero = I(0);
114 let none = I(NONE);
115
116 let (child, stack) = stack.make_raw::<I::Signed>(n);
117 let (sibling, stack) = stack.make_raw::<I::Signed>(n);
118 let (stack, _) = stack.make_raw::<I::Signed>(n);
119
120 let child = Array::from_mut(ghost::fill_none::<I>(child, N), N);
121 let sibling = Array::from_mut(ghost::fill_none::<I>(sibling, N), N);
122 let stack = Array::from_mut(stack, N);
123
124 for j in N.indices().rev() {
125 if nv[j] > zero {
126 if let Some(parent) = etree[j].idx() {
127 let parent = parent.zx();
128 sibling[j] = child[parent];
129 child[parent] = MaybeIdx::from_index(j.truncate());
130 }
131 }
132 }
133
134 for i in N.indices() {
135 if nv[i] > zero {
136 if let Some(child_) = child[i].idx() {
137 let child_ = child_.zx();
138
139 let mut fprev = MaybeIdx::<'n>::none();
140 let mut bigfprev = MaybeIdx::<'n>::none();
141 let mut bigf = MaybeIdx::<'n>::none();
142 let mut maxfrsize = none;
143
144 let mut f = MaybeIdx::from_index(child_);
145 while let Some(f_) = f.idx() {
146 let frsize = f_size[f_];
147 if frsize >= maxfrsize {
148 maxfrsize = frsize;
149 bigfprev = fprev;
150 bigf = f;
151 }
152 fprev = f;
153 f = sibling[f_].sx();
154 }
155
156 let bigf = bigf.idx().unwrap();
157 let fnext = sibling[bigf];
158 if let Some(fnext) = fnext.idx() {
159 if let Some(bigfprev) = bigfprev.idx() {
160 sibling[bigfprev] = MaybeIdx::from_index(fnext);
161 } else {
162 child[i] = MaybeIdx::from_index(fnext);
163 }
164
165 let fprev = fprev.idx().unwrap();
166 sibling[bigf] = MaybeIdx::none();
167 sibling[fprev] = MaybeIdx::from_index(bigf.truncate());
168 }
169 }
170 }
171 }
172
173 mem::fill_none(order.as_mut());
174
175 let mut k = 0usize;
176 for i in N.indices() {
177 if etree[i].idx().is_none() && nv[i] > zero {
178 k = post_tree(i, k, child, sibling, order, stack);
179 }
180 }
181}
182
183#[inline(always)]
184fn flip<I: SignedIndex>(i: I) -> I {
185 -I::truncate(2) - i
186}
187
188#[inline]
189fn clear_flag<I: SignedIndex>(wflg: I, wbig: I, w: &mut [I]) -> I {
190 let I = I::truncate;
191 let zero = I(0);
192 let one = I(1);
193
194 if wflg < I(2) || wflg >= wbig {
195 for x in w {
196 if *x != zero {
197 *x = one;
198 }
199 }
200 return I(2);
201 }
202 wflg
203}
204
205#[allow(clippy::comparison_chain)]
206fn amd_2<I: Index>(
207 pe: &mut [I::Signed], iw: &mut [I::Signed], len: &mut [I::Signed], pfree: usize,
211 next: &mut [I::Signed],
212 last: &mut [I::Signed],
213 control: Control,
214 stack: PodStack<'_>,
215) -> FlopCount {
216 let n = pe.len();
217 assert!(n < I::Signed::MAX.zx());
218
219 let mut pfree = pfree;
220 let iwlen = iw.len();
221
222 let I = I::Signed::truncate;
223 let none = I(NONE);
224 let zero = I(0);
225 let one = I(1);
226
227 let alpha = control.dense;
228 let aggressive = control.aggressive;
229
230 let mut mindeg = 0usize;
231 let mut ncmpa = 0usize;
232 let mut lemax = 0usize;
233
234 let mut ndiv = 0.0;
235 let mut nms_lu = 0.0;
236 let mut nms_ldl = 0.0;
237
238 let mut nel = 0usize;
239 let mut me = none;
240
241 let dense = if alpha < 0.0 {
242 n - 2
243 } else {
244 (alpha * (n as f64).faer_sqrt()) as usize
245 };
246
247 let dense = Ord::max(dense, 16);
248 let dense = Ord::min(dense, n);
249
250 let (w, stack) = stack.make_raw::<I::Signed>(n);
251 let (nv, stack) = stack.make_raw::<I::Signed>(n);
252 let (elen, mut stack) = stack.make_raw::<I::Signed>(n);
253
254 let nv = &mut *nv;
255 let elen = &mut *elen;
256 let w = &mut *w;
257
258 let wbig = I::Signed::MAX - I(n);
259 let mut wflg = clear_flag(zero, wbig, w);
260 let mut ndense = zero;
261
262 {
263 let (head, stack) = stack.rb_mut().make_raw::<I::Signed>(n);
264 let (degree, _) = stack.make_raw::<I::Signed>(n);
265
266 let head = &mut *head;
267 let degree = &mut *degree;
268
269 mem::fill_none(last);
270 mem::fill_none(head);
271 mem::fill_none(next);
272 nv.fill(one);
273 w.fill(one);
274 mem::fill_zero(elen);
275 degree.copy_from_slice(len);
276
277 for i in 0..n {
278 let deg = degree[i].zx();
279 if deg == 0 {
280 elen[i] = flip(one);
281 nel += 1;
282 pe[i] = none;
283 w[i] = zero;
284 } else if deg > dense {
285 ndense += one;
286 nv[i] = zero;
287 elen[i] = none;
288 pe[i] = none;
289 nel += 1;
290 } else {
291 let inext = head[deg];
292 if inext != none {
293 last[inext.zx()] = I(i);
294 }
295 next[i] = inext;
296 head[deg] = I(i);
297 }
298 }
299
300 while nel < n {
301 let mut deg = mindeg;
302 while deg < n {
303 me = head[deg];
304 if me != none {
305 break;
306 }
307 deg += 1;
308 }
309 mindeg = deg;
310
311 let me = me.zx();
312 let inext = next[me];
313 if inext != none {
314 last[inext.zx()] = none;
315 }
316 head[deg] = inext;
317
318 let elenme = elen[me];
319 let mut nvpiv = nv[me];
320 nel += nvpiv.zx();
321
322 nv[me] = -nvpiv;
323 let mut degme = 0usize;
324 let mut pme1;
325 let mut pme2;
326 if elenme == zero {
327 pme1 = pe[me];
328 pme2 = pme1 - one;
329
330 for p in pme1.zx()..(pme1 + len[me]).zx() {
331 let i = iw[p].zx();
332 let nvi = nv[i];
333 if nvi > zero {
334 degme += nvi.zx();
335 nv[i] = -nvi;
336 pme2 += one;
337 iw[pme2.zx()] = I(i);
338
339 let ilast = last[i];
340 let inext = next[i];
341
342 if inext != none {
343 last[inext.zx()] = ilast;
344 }
345 if ilast != none {
346 next[ilast.zx()] = inext;
347 } else {
348 head[degree[i].zx()] = inext;
349 }
350 }
351 }
352 } else {
353 let mut p = pe[me].zx();
354 pme1 = I(pfree);
355 let slenme = (len[me] - elenme).zx();
356
357 for knt1 in 1..elenme.zx() + 2 {
358 let e;
359 let mut pj;
360 let ln;
361 if I(knt1) > elenme {
362 e = me;
363 pj = I(p);
364 ln = slenme;
365 } else {
366 e = iw[p].zx();
367 p += 1;
368 pj = pe[e];
369 ln = len[e].zx();
370 }
371
372 for knt2 in 1..ln + 1 {
373 let i = iw[pj.zx()].zx();
374 pj += one;
375 let nvi = nv[i];
376
377 if nvi > zero {
378 if pfree >= iwlen {
379 pe[me] = I(p);
380 len[me] -= I(knt1);
381 if len[me] == zero {
382 pe[me] = none;
383 }
384
385 pe[e] = pj;
386 len[e] = I(ln - knt2);
387 if len[e] == zero {
388 pe[e] = none;
389 }
390
391 ncmpa += 1;
392 for (j, pe) in pe.iter_mut().enumerate() {
393 let pn = *pe;
394 if pn >= zero {
395 let pn = pn.zx();
396 *pe = iw[pn];
397 iw[pn] = flip(I(j));
398 }
399 }
400
401 let mut psrc = 0usize;
402 let mut pdst = 0usize;
403 let pend = pme1.zx();
404
405 while psrc < pend {
406 let j = flip(iw[psrc]);
407 psrc += 1;
408 if j >= zero {
409 let j = j.zx();
410 iw[pdst] = pe[j];
411 pe[j] = I(pdst);
412 pdst += 1;
413 let lenj = len[j].zx();
414
415 if lenj > 0 {
416 iw.copy_within(psrc..psrc + lenj - 1, pdst);
417 psrc += lenj - 1;
418 pdst += lenj - 1;
419 }
420 }
421 }
422
423 let p1 = pdst;
424 iw.copy_within(pme1.zx()..pfree, pdst);
425 pdst += pfree - pme1.zx();
426
427 pme1 = I(p1);
428 pfree = pdst;
429 pj = pe[e];
430 p = pe[me].zx();
431 }
432
433 degme += nvi.zx();
434 nv[i] = -nvi;
435 iw[pfree] = I(i);
436 pfree += 1;
437
438 let ilast = last[i];
439 let inext = next[i];
440
441 if inext != none {
442 last[inext.zx()] = ilast;
443 }
444 if ilast != none {
445 next[ilast.zx()] = inext;
446 } else {
447 head[degree[i].zx()] = inext;
448 }
449 }
450 }
451 if e != me {
452 pe[e] = flip(I(me));
453 w[e] = zero;
454 }
455 }
456 pme2 = I(pfree - 1);
457 }
458
459 degree[me] = I(degme);
460 pe[me] = pme1;
461 len[me] = pme2 - pme1 + one;
462 elen[me] = flip(nvpiv + I(degme));
463
464 wflg = clear_flag(wflg, wbig, w);
465 assert!(pme1 >= zero);
466 assert!(pme2 >= zero);
467 for pme in pme1.zx()..pme2.zx() + 1 {
468 let i = iw[pme].zx();
469 let eln = elen[i];
470 if eln > zero {
471 let nvi = -nv[i];
472 let wnvi = wflg - nvi;
473 for iw in iw[pe[i].zx()..][..eln.zx()].iter() {
474 let e = iw.zx();
475 let mut we = w[e];
476 if we >= wflg {
477 we -= nvi;
478 } else if we != zero {
479 we = degree[e] + wnvi;
480 }
481 w[e] = we;
482 }
483 }
484 }
485
486 for pme in pme1.zx()..pme2.zx() + 1 {
487 let i = iw[pme].zx();
488 let p1 = pe[i].zx();
489 let p2 = p1 + elen[i].zx();
490 let mut pn = p1;
491
492 let mut hash = 0usize;
493 deg = 0usize;
494
495 if aggressive {
496 for p in p1..p2 {
497 let e = iw[p].zx();
498 let we = w[e];
499 if we != zero {
500 let dext = we - wflg;
501 if dext > zero {
502 deg += dext.zx();
503 iw[pn] = I(e);
504 pn += 1;
505 hash = hash.wrapping_add(e);
506 } else {
507 pe[e] = flip(I(me));
508 w[e] = zero;
509 }
510 }
511 }
512 } else {
513 for p in p1..p2 {
514 let e = iw[p].zx();
515 let we = w[e];
516 if we != zero {
517 let dext = (we - wflg).zx();
518 deg += dext;
519 iw[pn] = I(e);
520 pn += 1;
521 hash = hash.wrapping_add(e);
522 }
523 }
524 }
525
526 elen[i] = I(pn - p1 + 1);
527 let p3 = pn;
528 let p4 = p1 + len[i].zx();
529 for p in p2..p4 {
530 let j = iw[p].zx();
531 let nvj = nv[j];
532 if nvj > zero {
533 deg += nvj.zx();
534 iw[pn] = I(j);
535 pn += 1;
536 hash = hash.wrapping_add(j);
537 }
538 }
539
540 if elen[i] == one && p3 == pn {
541 pe[i] = flip(I(me));
542 let nvi = -nv[i];
543 assert!(nvi >= zero);
544 degme -= nvi.zx();
545 nvpiv += nvi;
546 nel += nvi.zx();
547 nv[i] = zero;
548 elen[i] = none;
549 } else {
550 degree[i] = Ord::min(degree[i], I(deg));
551 iw[pn] = iw[p3];
552 iw[p3] = iw[p1];
553 iw[p1] = I(me);
554 len[i] = I(pn - p1 + 1);
555 hash %= n;
556
557 let j = head[hash];
558 if j <= none {
559 next[i] = flip(j);
560 head[hash] = flip(I(i));
561 } else {
562 next[i] = last[j.zx()];
563 last[j.zx()] = I(i);
564 }
565 last[i] = I(hash);
566 }
567 }
568
569 degree[me] = I(degme);
570 lemax = Ord::max(lemax, degme);
571 wflg += I(lemax);
572 wflg = clear_flag(wflg, wbig, w);
573
574 for pme in pme1.zx()..pme2.zx() + 1 {
575 let mut i = iw[pme].zx();
576 if nv[i] < zero {
577 let hash = last[i].zx();
578 let j = head[hash];
579
580 if j == none {
581 i = NONE;
582 } else if j < none {
583 i = flip(j).zx();
584 head[hash] = none;
585 } else {
586 i = last[j.zx()].sx();
587 last[j.zx()] = none;
588 }
589
590 while i != NONE && next[i] != none {
591 let ln = len[i];
592 let eln = elen[i];
593
594 for p in (pe[i] + one).zx()..(pe[i] + ln).zx() {
595 w[iw[p].zx()] = wflg;
596 }
597 let mut jlast = i;
598 let mut j = next[i].sx();
599 while j != NONE {
600 let mut ok = len[j] == ln && elen[j] == eln;
601 for p in (pe[j] + one).zx()..(pe[j] + ln).zx() {
602 if w[iw[p].zx()] != wflg {
603 ok = false;
604 }
605 }
606
607 if ok {
608 pe[j] = flip(I(i));
609 nv[i] += nv[j];
610 nv[j] = zero;
611 elen[j] = none;
612 j = next[j].sx();
613 next[jlast] = I(j);
614 } else {
615 jlast = j;
616 j = next[j].sx();
617 }
618 }
619
620 wflg += one;
621 i = next[i].sx();
622 }
623 }
624 }
625
626 let mut p = pme1.zx();
627 let nleft = n - nel;
628 for pme in pme1.zx()..pme2.zx() + 1 {
629 let i = iw[pme].zx();
630 let nvi = -nv[i];
631 if nvi > zero {
632 nv[i] = nvi;
633 deg = degree[i].zx() + degme - nvi.zx();
634 deg = Ord::min(deg, nleft - nvi.zx());
635
636 let inext = head[deg];
637 if inext != none {
638 last[inext.zx()] = I(i);
639 }
640 next[i] = inext;
641 last[i] = none;
642 head[deg] = I(i);
643
644 mindeg = Ord::min(mindeg, deg);
645 degree[i] = I(deg);
646 iw[p] = I(i);
647 p += 1;
648 }
649 }
650 nv[me] = nvpiv;
651 len[me] = I(p) - pme1;
652 if len[me] == zero {
653 pe[me] = none;
654 w[me] = zero;
655 }
656 if elenme != zero {
657 pfree = p;
658 }
659
660 {
661 let f = nvpiv.sx() as isize as f64;
662 let r = degme as f64 + ndense.sx() as isize as f64;
663 let lnzme = f * r + (f - 1.0) * f / 2.0;
664 ndiv += lnzme;
665 let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
666 nms_lu += s;
667 nms_ldl += (s + lnzme) / 2.0;
668 }
669 }
670
671 {
672 let f = ndense.sx() as isize as f64;
673 let lnzme = (f - 1.0) * f / 2.0;
674 ndiv += lnzme;
675 let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
676 nms_lu += s;
677 nms_ldl += (s + lnzme) / 2.0;
678 }
679
680 for pe in pe.iter_mut() {
681 *pe = flip(*pe);
682 }
683 for elen in elen.iter_mut() {
684 *elen = flip(*elen);
685 }
686
687 for i in 0..n {
688 if nv[i] == zero {
689 let mut j = pe[i].sx();
690 if j == NONE {
691 continue;
692 }
693
694 while nv[j] == zero {
695 j = pe[j].zx();
696 }
697 let e = I(j);
698 let mut j = i;
699 while nv[j] == zero {
700 let jnext = pe[j];
701 pe[j] = e;
702 j = jnext.zx();
703 }
704 }
705 }
706 }
707
708 ghost::with_size(n, |N| {
709 postorder(
710 Array::from_mut(w, N),
711 Array::from_ref(MaybeIdx::<'_, I>::from_slice_ref_checked(pe, N), N),
712 Array::from_ref(nv, N),
713 Array::from_ref(elen, N),
714 stack.rb_mut(),
715 );
716 });
717
718 let (head, _) = stack.make_raw::<I::Signed>(n);
719
720 mem::fill_none(head);
721 mem::fill_none(next);
722 for (e, &k) in w.iter().enumerate() {
723 if k != none {
724 head[k.zx()] = I(e);
725 }
726 }
727 nel = 0;
728 for &e in head.iter() {
729 if e == none {
730 break;
731 }
732 let e = e.zx();
733 next[e] = I(nel);
734 nel += nv[e].zx();
735 }
736 assert!(nel == n - ndense.zx());
737
738 for i in 0..n {
739 if nv[i] == zero {
740 let e = pe[i];
741 if e != none {
742 let e = e.zx();
743 next[i] = next[e];
744 next[e] += one;
745 } else {
746 next[i] = I(nel);
747 nel += 1;
748 }
749 }
750 }
751 assert!(nel == n);
752 for i in 0..n {
753 last[next[i].zx()] = I(i);
754 }
755
756 let _ = ncmpa;
757 FlopCount {
758 n_div: ndiv,
759 n_mult_subs_ldl: nms_ldl,
760 n_mult_subs_lu: nms_lu,
761 }
762}
763
764#[allow(clippy::comparison_chain)]
765fn amd_1<I: Index>(
766 perm: &mut [I::Signed],
767 perm_inv: &mut [I::Signed],
768 A: SymbolicSparseColMatRef<'_, I>,
769 len: &mut [I::Signed],
770 iwlen: usize,
771 control: Control,
772 stack: PodStack<'_>,
773) -> FlopCount {
774 let n = perm.len();
775 let I = I::Signed::truncate;
776
777 let zero = I(0);
778 let one = I(1);
779
780 let (p_e, stack) = stack.make_raw::<I::Signed>(n);
781 let (s_p, stack) = stack.make_raw::<I::Signed>(n);
782 let (i_w, mut stack) = stack.make_raw::<I::Signed>(iwlen);
783
784 let mut pfree = zero;
787 for j in 0..n {
788 p_e[j] = pfree;
789 s_p[j] = pfree;
790 pfree += len[j];
791 }
792 let pfree = pfree.zx();
793
794 assert!(iwlen >= pfree + n);
799
800 ghost::with_size(n, |N| {
801 let (t_p, _) = stack.rb_mut().make_raw::<I::Signed>(n);
802
803 let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
804 let s_p = Array::from_mut(s_p, N);
805 let t_p = Array::from_mut(t_p, N);
806
807 for k in N.indices() {
808 let mut seen = zero;
810 for j in A.row_indices_of_col(k) {
811 if j < k {
812 i_w[s_p[j].zx()] = I(*k);
814 s_p[j] += one;
815
816 i_w[s_p[k].zx()] = I(*j);
817 s_p[k] += one;
818
819 seen += one;
820 } else if j == k {
821 seen += one;
823 break;
824 } else {
825 break;
828 }
829
830 let mut seen_j = zero;
833 for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
834 let i = i.zx();
835 if i < k {
836 i_w[s_p[i].zx()] = I(*j);
839 s_p[i] += one;
840
841 i_w[s_p[j].zx()] = I(*i);
842 s_p[j] += one;
843
844 seen_j += one;
845 } else if i == k {
846 seen_j += one;
848 break;
849 } else {
850 break;
853 }
854 }
855 t_p[j] += seen_j;
856 }
857 t_p[k] = seen;
858 }
859
860 for j in N.indices() {
862 for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
863 let i = i.zx();
864 i_w[s_p[i].zx()] = I(*j);
865 s_p[i] += one;
866
867 i_w[s_p[j].zx()] = I(*i);
868 s_p[j] += one;
869 }
870 }
871 });
872
873 debug_assert!(s_p[n - 1] == I(pfree));
874
875 amd_2::<I>(p_e, i_w, len, pfree, perm_inv, perm, control, stack)
876}
877
878fn preprocess<'out, I: Index>(
879 new_col_ptrs: &'out mut [I],
880 new_row_indices: &'out mut [I],
881 A: SymbolicSparseColMatRef<'_, I>,
882 stack: PodStack<'_>,
883) -> SymbolicSparseColMatRef<'out, I> {
884 let n = A.nrows();
885
886 ghost::with_size(n, |N| {
887 let I = I::Signed::truncate;
888 let zero = I(0);
889 let one = I(1);
890
891 let (w, stack) = stack.make_raw::<I::Signed>(n);
892 let (flag, _) = stack.make_raw::<I::Signed>(n);
893
894 let w = Array::from_mut(w, N);
895 let flag = Array::from_mut(flag, N);
896 let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
897
898 mem::fill_zero(w.as_mut());
899 mem::fill_none(flag.as_mut());
900
901 for j in N.indices() {
902 let j_ = I(*j);
903 for i in A.row_indices_of_col(j) {
904 if flag[i] != j_ {
905 w[i] += one;
906 flag[i] = j_;
907 }
908 }
909 }
910
911 new_col_ptrs[0] = I::from_signed(zero);
912 for (i, [r, r_next]) in zip(
913 N.indices(),
914 windows2(Cell::as_slice_of_cells(Cell::from_mut(new_col_ptrs))),
915 ) {
916 r_next.set(r.get() + I::from_signed(w[i]));
917 }
918
919 w.as_mut()
920 .copy_from_slice(bytemuck::cast_slice(&new_col_ptrs[..n]));
921 mem::fill_none(flag.as_mut());
922
923 for j in N.indices() {
924 let j_ = I(*j);
925 for i in A.row_indices_of_col(j) {
926 if flag[i] != j_ {
927 new_row_indices[w[i].zx()] = I::from_signed(j_);
928 w[i] += one;
929 flag[i] = j_;
930 }
931 }
932 }
933
934 unsafe {
935 SymbolicSparseColMatRef::new_unchecked(
936 n,
937 n,
938 &*new_col_ptrs,
939 None,
940 &new_row_indices[..new_col_ptrs[n].zx()],
941 )
942 }
943 })
944}
945
946#[allow(clippy::comparison_chain)]
947fn aat<I: Index>(
948 len: &mut [I::Signed],
949 A: SymbolicSparseColMatRef<'_, I>,
950 stack: PodStack<'_>,
951) -> Result<usize, FaerError> {
952 ghost::with_size(A.nrows(), |N| {
953 let I = I::Signed::truncate;
954 let zero = I(0);
955 let one = I(1);
956 let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
957
958 let n = *N;
959
960 let t_p = &mut *stack.make_raw::<I::Signed>(n).0; let len = Array::from_mut(len, N);
963 let t_p = Array::from_mut(t_p, N);
964
965 mem::fill_zero(len.as_mut());
966
967 for k in N.indices() {
968 let mut seen = zero;
969
970 for j in A.row_indices_of_col(k) {
971 if j < k {
972 seen += one;
973 len[j] += one;
974 len[k] += one;
975 } else if j == k {
976 seen += one;
977 break;
978 } else {
979 break;
980 }
981
982 let mut seen_j = zero;
983 for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
984 let i = i.zx();
985 if i < k {
986 len[i] += one;
987 len[j] += one;
988 seen_j += one;
989 } else if i == k {
990 seen_j += one;
991 break;
992 } else {
993 break;
994 }
995 }
996 t_p[j] += seen_j;
997 }
998 t_p[k] = seen;
999 }
1000
1001 for j in N.indices() {
1002 for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
1003 len[i.zx()] += one;
1004 len[j] += one;
1005 }
1006 }
1007 });
1008 let nzaat = I::Signed::sum_nonnegative(len).map(I::from_signed);
1009 nzaat.ok_or(FaerError::IndexOverflow).map(I::zx)
1010}
1011
1012pub fn order_sorted_req<I: Index>(n: usize, nnz_upper: usize) -> Result<StackReq, SizeOverflow> {
1013 let n_req = StackReq::try_new::<I>(n)?;
1014 let nzaat = nnz_upper.checked_mul(2).ok_or(SizeOverflow)?;
1015 StackReq::try_all_of([
1016 n_req,
1018 StackReq::try_new::<I>(nzaat.checked_add(nzaat / 5).ok_or(SizeOverflow)?)?,
1020 n_req,
1021 n_req,
1023 n_req,
1025 n_req,
1027 n_req,
1029 n_req,
1031 n_req,
1033 n_req,
1035 n_req,
1037 n_req,
1039 ])
1040}
1041
1042pub fn order_maybe_unsorted_req<I: Index>(
1043 n: usize,
1044 nnz_upper: usize,
1045) -> Result<StackReq, SizeOverflow> {
1046 StackReq::try_all_of([
1047 order_sorted_req::<I>(n, nnz_upper)?,
1048 StackReq::try_new::<I>(n + 1)?,
1049 StackReq::try_new::<I>(nnz_upper)?,
1050 ])
1051}
1052
1053pub fn order_sorted<I: Index>(
1054 perm: &mut [I],
1055 perm_inv: &mut [I],
1056 A: SymbolicSparseColMatRef<'_, I>,
1057 control: Control,
1058 stack: PodStack<'_>,
1059) -> Result<FlopCount, FaerError> {
1060 let n = perm.len();
1061
1062 if n == 0 {
1063 return Ok(FlopCount {
1064 n_div: 0.0,
1065 n_mult_subs_ldl: 0.0,
1066 n_mult_subs_lu: 0.0,
1067 });
1068 }
1069
1070 let (len, mut stack) = stack.make_raw::<I::Signed>(n);
1071 let nzaat = aat(len, A, stack.rb_mut())?;
1072 let iwlen = nzaat
1073 .checked_add(nzaat / 5)
1074 .and_then(|x| x.checked_add(n))
1075 .ok_or(FaerError::IndexOverflow)?;
1076 Ok(amd_1::<I>(
1077 bytemuck::cast_slice_mut(perm),
1078 bytemuck::cast_slice_mut(perm_inv),
1079 A,
1080 len,
1081 iwlen,
1082 control,
1083 stack,
1084 ))
1085}
1086
1087pub fn order_maybe_unsorted<I: Index>(
1090 perm: &mut [I],
1091 perm_inv: &mut [I],
1092 A: SymbolicSparseColMatRef<'_, I>,
1093 control: Control,
1094 stack: PodStack<'_>,
1095) -> Result<FlopCount, FaerError> {
1096 let n = perm.len();
1097
1098 if n == 0 {
1099 return Ok(FlopCount {
1100 n_div: 0.0,
1101 n_mult_subs_ldl: 0.0,
1102 n_mult_subs_lu: 0.0,
1103 });
1104 }
1105 let nnz = A.compute_nnz();
1106 let (new_col_ptrs, stack) = stack.make_raw::<I>(n + 1);
1107 let (new_row_indices, mut stack) = stack.make_raw::<I>(nnz);
1108 let A = preprocess(new_col_ptrs, new_row_indices, A, stack.rb_mut());
1109 order_sorted(perm, perm_inv, A, control, stack)
1110}
1111
1112#[derive(Debug, Copy, Clone, PartialEq)]
1113pub struct Control {
1114 pub dense: f64,
1116 pub aggressive: bool,
1118}
1119
1120impl Default for Control {
1121 #[inline]
1122 fn default() -> Self {
1123 Self {
1124 dense: 10.0,
1125 aggressive: true,
1126 }
1127 }
1128}
1129
1130#[derive(Default, Debug, Copy, Clone, PartialEq)]
1131pub struct FlopCount {
1132 pub n_div: f64,
1133 pub n_mult_subs_ldl: f64,
1134 pub n_mult_subs_lu: f64,
1135}