faer_sparse/
amd.rs

1//! Approximate minimum degree ordering.
2
3// AMD, Copyright (c), 1996-2022, Timothy A. Davis,
4// Patrick R. Amestoy, and Iain S. Duff.  All Rights Reserved.
5//
6// Availability:
7//
8//     http://www.suitesparse.com
9//
10// -------------------------------------------------------------------------------
11// AMD License: BSD 3-clause:
12// -------------------------------------------------------------------------------
13//
14//     Redistribution and use in source and binary forms, with or without
15//     modification, are permitted provided that the following conditions are met:
16//         * Redistributions of source code must retain the above copyright notice, this list of
17//           conditions and the following disclaimer.
18//         * Redistributions in binary form must reproduce the above copyright notice, this list of
19//           conditions and the following disclaimer in the documentation and/or other materials
20//           provided with the distribution.
21//         * Neither the name of the organizations to which the authors are affiliated, nor the
22//           names of its contributors may be used to endorse or promote products derived from this
23//           software without specific prior written permission.
24//
25//     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26//     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27//     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28//     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
29//     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30//     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31//     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32//     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33//     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34//     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
35//     DAMAGE.
36
37use crate::{
38    ghost::{self, Array, Idx, MaybeIdx},
39    mem::{self, NONE},
40    windows2, FaerError, Index, SignedIndex, SymbolicSparseColMatRef,
41};
42use core::{cell::Cell, iter::zip};
43use dyn_stack::{PodStack, SizeOverflow, StackReq};
44use faer_core::{assert, ComplexField};
45use reborrow::*;
46
47#[inline]
48fn post_tree<'n, I: Index>(
49    root: Idx<'n, usize>,
50    k: usize,
51    child: &mut Array<'n, MaybeIdx<'n, I>>,
52    sibling: &Array<'n, MaybeIdx<'n, I>>,
53    order: &mut Array<'n, I::Signed>,
54    stack: &mut Array<'n, I::Signed>,
55) -> usize {
56    let N = child.len();
57    let I = I::Signed::truncate;
58    let mut top = 1usize;
59    stack[N.check(0)] = I(*root);
60
61    let mut k = k;
62
63    while top > 0 {
64        let i = N.check(stack[N.check(top - 1)].zx());
65        if let Some(child_) = child[i].idx() {
66            let mut f = child_.zx();
67            loop {
68                top += 1;
69                if let Some(new_f) = sibling[f].idx() {
70                    f = new_f.zx();
71                } else {
72                    break;
73                }
74            }
75
76            let mut t = top;
77            let mut f = child_.zx();
78            loop {
79                t -= 1;
80                stack[N.check(t)] = I(*f);
81                if let Some(new_f) = sibling[f].idx() {
82                    f = new_f.zx();
83                } else {
84                    break;
85                }
86            }
87            child[i] = MaybeIdx::none();
88        } else {
89            top -= 1;
90            order[i] = I(k);
91            k += 1;
92        }
93    }
94
95    k
96}
97
98#[inline]
99fn postorder<'n, 'out, I: Index>(
100    order: &'out mut Array<'n, I::Signed>,
101    etree: &Array<'n, MaybeIdx<'n, I>>,
102    nv: &Array<'n, I::Signed>,
103    f_size: &Array<'n, I::Signed>,
104    stack: PodStack<'_>,
105) {
106    let N = order.len();
107    let n = *N;
108    if n == 0 {
109        return;
110    }
111
112    let I = I::Signed::truncate;
113    let zero = I(0);
114    let none = I(NONE);
115
116    let (child, stack) = stack.make_raw::<I::Signed>(n);
117    let (sibling, stack) = stack.make_raw::<I::Signed>(n);
118    let (stack, _) = stack.make_raw::<I::Signed>(n);
119
120    let child = Array::from_mut(ghost::fill_none::<I>(child, N), N);
121    let sibling = Array::from_mut(ghost::fill_none::<I>(sibling, N), N);
122    let stack = Array::from_mut(stack, N);
123
124    for j in N.indices().rev() {
125        if nv[j] > zero {
126            if let Some(parent) = etree[j].idx() {
127                let parent = parent.zx();
128                sibling[j] = child[parent];
129                child[parent] = MaybeIdx::from_index(j.truncate());
130            }
131        }
132    }
133
134    for i in N.indices() {
135        if nv[i] > zero {
136            if let Some(child_) = child[i].idx() {
137                let child_ = child_.zx();
138
139                let mut fprev = MaybeIdx::<'n>::none();
140                let mut bigfprev = MaybeIdx::<'n>::none();
141                let mut bigf = MaybeIdx::<'n>::none();
142                let mut maxfrsize = none;
143
144                let mut f = MaybeIdx::from_index(child_);
145                while let Some(f_) = f.idx() {
146                    let frsize = f_size[f_];
147                    if frsize >= maxfrsize {
148                        maxfrsize = frsize;
149                        bigfprev = fprev;
150                        bigf = f;
151                    }
152                    fprev = f;
153                    f = sibling[f_].sx();
154                }
155
156                let bigf = bigf.idx().unwrap();
157                let fnext = sibling[bigf];
158                if let Some(fnext) = fnext.idx() {
159                    if let Some(bigfprev) = bigfprev.idx() {
160                        sibling[bigfprev] = MaybeIdx::from_index(fnext);
161                    } else {
162                        child[i] = MaybeIdx::from_index(fnext);
163                    }
164
165                    let fprev = fprev.idx().unwrap();
166                    sibling[bigf] = MaybeIdx::none();
167                    sibling[fprev] = MaybeIdx::from_index(bigf.truncate());
168                }
169            }
170        }
171    }
172
173    mem::fill_none(order.as_mut());
174
175    let mut k = 0usize;
176    for i in N.indices() {
177        if etree[i].idx().is_none() && nv[i] > zero {
178            k = post_tree(i, k, child, sibling, order, stack);
179        }
180    }
181}
182
183#[inline(always)]
184fn flip<I: SignedIndex>(i: I) -> I {
185    -I::truncate(2) - i
186}
187
188#[inline]
189fn clear_flag<I: SignedIndex>(wflg: I, wbig: I, w: &mut [I]) -> I {
190    let I = I::truncate;
191    let zero = I(0);
192    let one = I(1);
193
194    if wflg < I(2) || wflg >= wbig {
195        for x in w {
196            if *x != zero {
197                *x = one;
198            }
199        }
200        return I(2);
201    }
202    wflg
203}
204
205#[allow(clippy::comparison_chain)]
206fn amd_2<I: Index>(
207    pe: &mut [I::Signed],  // input/output
208    iw: &mut [I::Signed],  // input/modified (undefined on output)
209    len: &mut [I::Signed], // input/modified (undefined on output)
210    pfree: usize,
211    next: &mut [I::Signed],
212    last: &mut [I::Signed],
213    control: Control,
214    stack: PodStack<'_>,
215) -> FlopCount {
216    let n = pe.len();
217    assert!(n < I::Signed::MAX.zx());
218
219    let mut pfree = pfree;
220    let iwlen = iw.len();
221
222    let I = I::Signed::truncate;
223    let none = I(NONE);
224    let zero = I(0);
225    let one = I(1);
226
227    let alpha = control.dense;
228    let aggressive = control.aggressive;
229
230    let mut mindeg = 0usize;
231    let mut ncmpa = 0usize;
232    let mut lemax = 0usize;
233
234    let mut ndiv = 0.0;
235    let mut nms_lu = 0.0;
236    let mut nms_ldl = 0.0;
237
238    let mut nel = 0usize;
239    let mut me = none;
240
241    let dense = if alpha < 0.0 {
242        n - 2
243    } else {
244        (alpha * (n as f64).faer_sqrt()) as usize
245    };
246
247    let dense = Ord::max(dense, 16);
248    let dense = Ord::min(dense, n);
249
250    let (w, stack) = stack.make_raw::<I::Signed>(n);
251    let (nv, stack) = stack.make_raw::<I::Signed>(n);
252    let (elen, mut stack) = stack.make_raw::<I::Signed>(n);
253
254    let nv = &mut *nv;
255    let elen = &mut *elen;
256    let w = &mut *w;
257
258    let wbig = I::Signed::MAX - I(n);
259    let mut wflg = clear_flag(zero, wbig, w);
260    let mut ndense = zero;
261
262    {
263        let (head, stack) = stack.rb_mut().make_raw::<I::Signed>(n);
264        let (degree, _) = stack.make_raw::<I::Signed>(n);
265
266        let head = &mut *head;
267        let degree = &mut *degree;
268
269        mem::fill_none(last);
270        mem::fill_none(head);
271        mem::fill_none(next);
272        nv.fill(one);
273        w.fill(one);
274        mem::fill_zero(elen);
275        degree.copy_from_slice(len);
276
277        for i in 0..n {
278            let deg = degree[i].zx();
279            if deg == 0 {
280                elen[i] = flip(one);
281                nel += 1;
282                pe[i] = none;
283                w[i] = zero;
284            } else if deg > dense {
285                ndense += one;
286                nv[i] = zero;
287                elen[i] = none;
288                pe[i] = none;
289                nel += 1;
290            } else {
291                let inext = head[deg];
292                if inext != none {
293                    last[inext.zx()] = I(i);
294                }
295                next[i] = inext;
296                head[deg] = I(i);
297            }
298        }
299
300        while nel < n {
301            let mut deg = mindeg;
302            while deg < n {
303                me = head[deg];
304                if me != none {
305                    break;
306                }
307                deg += 1;
308            }
309            mindeg = deg;
310
311            let me = me.zx();
312            let inext = next[me];
313            if inext != none {
314                last[inext.zx()] = none;
315            }
316            head[deg] = inext;
317
318            let elenme = elen[me];
319            let mut nvpiv = nv[me];
320            nel += nvpiv.zx();
321
322            nv[me] = -nvpiv;
323            let mut degme = 0usize;
324            let mut pme1;
325            let mut pme2;
326            if elenme == zero {
327                pme1 = pe[me];
328                pme2 = pme1 - one;
329
330                for p in pme1.zx()..(pme1 + len[me]).zx() {
331                    let i = iw[p].zx();
332                    let nvi = nv[i];
333                    if nvi > zero {
334                        degme += nvi.zx();
335                        nv[i] = -nvi;
336                        pme2 += one;
337                        iw[pme2.zx()] = I(i);
338
339                        let ilast = last[i];
340                        let inext = next[i];
341
342                        if inext != none {
343                            last[inext.zx()] = ilast;
344                        }
345                        if ilast != none {
346                            next[ilast.zx()] = inext;
347                        } else {
348                            head[degree[i].zx()] = inext;
349                        }
350                    }
351                }
352            } else {
353                let mut p = pe[me].zx();
354                pme1 = I(pfree);
355                let slenme = (len[me] - elenme).zx();
356
357                for knt1 in 1..elenme.zx() + 2 {
358                    let e;
359                    let mut pj;
360                    let ln;
361                    if I(knt1) > elenme {
362                        e = me;
363                        pj = I(p);
364                        ln = slenme;
365                    } else {
366                        e = iw[p].zx();
367                        p += 1;
368                        pj = pe[e];
369                        ln = len[e].zx();
370                    }
371
372                    for knt2 in 1..ln + 1 {
373                        let i = iw[pj.zx()].zx();
374                        pj += one;
375                        let nvi = nv[i];
376
377                        if nvi > zero {
378                            if pfree >= iwlen {
379                                pe[me] = I(p);
380                                len[me] -= I(knt1);
381                                if len[me] == zero {
382                                    pe[me] = none;
383                                }
384
385                                pe[e] = pj;
386                                len[e] = I(ln - knt2);
387                                if len[e] == zero {
388                                    pe[e] = none;
389                                }
390
391                                ncmpa += 1;
392                                for (j, pe) in pe.iter_mut().enumerate() {
393                                    let pn = *pe;
394                                    if pn >= zero {
395                                        let pn = pn.zx();
396                                        *pe = iw[pn];
397                                        iw[pn] = flip(I(j));
398                                    }
399                                }
400
401                                let mut psrc = 0usize;
402                                let mut pdst = 0usize;
403                                let pend = pme1.zx();
404
405                                while psrc < pend {
406                                    let j = flip(iw[psrc]);
407                                    psrc += 1;
408                                    if j >= zero {
409                                        let j = j.zx();
410                                        iw[pdst] = pe[j];
411                                        pe[j] = I(pdst);
412                                        pdst += 1;
413                                        let lenj = len[j].zx();
414
415                                        if lenj > 0 {
416                                            iw.copy_within(psrc..psrc + lenj - 1, pdst);
417                                            psrc += lenj - 1;
418                                            pdst += lenj - 1;
419                                        }
420                                    }
421                                }
422
423                                let p1 = pdst;
424                                iw.copy_within(pme1.zx()..pfree, pdst);
425                                pdst += pfree - pme1.zx();
426
427                                pme1 = I(p1);
428                                pfree = pdst;
429                                pj = pe[e];
430                                p = pe[me].zx();
431                            }
432
433                            degme += nvi.zx();
434                            nv[i] = -nvi;
435                            iw[pfree] = I(i);
436                            pfree += 1;
437
438                            let ilast = last[i];
439                            let inext = next[i];
440
441                            if inext != none {
442                                last[inext.zx()] = ilast;
443                            }
444                            if ilast != none {
445                                next[ilast.zx()] = inext;
446                            } else {
447                                head[degree[i].zx()] = inext;
448                            }
449                        }
450                    }
451                    if e != me {
452                        pe[e] = flip(I(me));
453                        w[e] = zero;
454                    }
455                }
456                pme2 = I(pfree - 1);
457            }
458
459            degree[me] = I(degme);
460            pe[me] = pme1;
461            len[me] = pme2 - pme1 + one;
462            elen[me] = flip(nvpiv + I(degme));
463
464            wflg = clear_flag(wflg, wbig, w);
465            assert!(pme1 >= zero);
466            assert!(pme2 >= zero);
467            for pme in pme1.zx()..pme2.zx() + 1 {
468                let i = iw[pme].zx();
469                let eln = elen[i];
470                if eln > zero {
471                    let nvi = -nv[i];
472                    let wnvi = wflg - nvi;
473                    for iw in iw[pe[i].zx()..][..eln.zx()].iter() {
474                        let e = iw.zx();
475                        let mut we = w[e];
476                        if we >= wflg {
477                            we -= nvi;
478                        } else if we != zero {
479                            we = degree[e] + wnvi;
480                        }
481                        w[e] = we;
482                    }
483                }
484            }
485
486            for pme in pme1.zx()..pme2.zx() + 1 {
487                let i = iw[pme].zx();
488                let p1 = pe[i].zx();
489                let p2 = p1 + elen[i].zx();
490                let mut pn = p1;
491
492                let mut hash = 0usize;
493                deg = 0usize;
494
495                if aggressive {
496                    for p in p1..p2 {
497                        let e = iw[p].zx();
498                        let we = w[e];
499                        if we != zero {
500                            let dext = we - wflg;
501                            if dext > zero {
502                                deg += dext.zx();
503                                iw[pn] = I(e);
504                                pn += 1;
505                                hash = hash.wrapping_add(e);
506                            } else {
507                                pe[e] = flip(I(me));
508                                w[e] = zero;
509                            }
510                        }
511                    }
512                } else {
513                    for p in p1..p2 {
514                        let e = iw[p].zx();
515                        let we = w[e];
516                        if we != zero {
517                            let dext = (we - wflg).zx();
518                            deg += dext;
519                            iw[pn] = I(e);
520                            pn += 1;
521                            hash = hash.wrapping_add(e);
522                        }
523                    }
524                }
525
526                elen[i] = I(pn - p1 + 1);
527                let p3 = pn;
528                let p4 = p1 + len[i].zx();
529                for p in p2..p4 {
530                    let j = iw[p].zx();
531                    let nvj = nv[j];
532                    if nvj > zero {
533                        deg += nvj.zx();
534                        iw[pn] = I(j);
535                        pn += 1;
536                        hash = hash.wrapping_add(j);
537                    }
538                }
539
540                if elen[i] == one && p3 == pn {
541                    pe[i] = flip(I(me));
542                    let nvi = -nv[i];
543                    assert!(nvi >= zero);
544                    degme -= nvi.zx();
545                    nvpiv += nvi;
546                    nel += nvi.zx();
547                    nv[i] = zero;
548                    elen[i] = none;
549                } else {
550                    degree[i] = Ord::min(degree[i], I(deg));
551                    iw[pn] = iw[p3];
552                    iw[p3] = iw[p1];
553                    iw[p1] = I(me);
554                    len[i] = I(pn - p1 + 1);
555                    hash %= n;
556
557                    let j = head[hash];
558                    if j <= none {
559                        next[i] = flip(j);
560                        head[hash] = flip(I(i));
561                    } else {
562                        next[i] = last[j.zx()];
563                        last[j.zx()] = I(i);
564                    }
565                    last[i] = I(hash);
566                }
567            }
568
569            degree[me] = I(degme);
570            lemax = Ord::max(lemax, degme);
571            wflg += I(lemax);
572            wflg = clear_flag(wflg, wbig, w);
573
574            for pme in pme1.zx()..pme2.zx() + 1 {
575                let mut i = iw[pme].zx();
576                if nv[i] < zero {
577                    let hash = last[i].zx();
578                    let j = head[hash];
579
580                    if j == none {
581                        i = NONE;
582                    } else if j < none {
583                        i = flip(j).zx();
584                        head[hash] = none;
585                    } else {
586                        i = last[j.zx()].sx();
587                        last[j.zx()] = none;
588                    }
589
590                    while i != NONE && next[i] != none {
591                        let ln = len[i];
592                        let eln = elen[i];
593
594                        for p in (pe[i] + one).zx()..(pe[i] + ln).zx() {
595                            w[iw[p].zx()] = wflg;
596                        }
597                        let mut jlast = i;
598                        let mut j = next[i].sx();
599                        while j != NONE {
600                            let mut ok = len[j] == ln && elen[j] == eln;
601                            for p in (pe[j] + one).zx()..(pe[j] + ln).zx() {
602                                if w[iw[p].zx()] != wflg {
603                                    ok = false;
604                                }
605                            }
606
607                            if ok {
608                                pe[j] = flip(I(i));
609                                nv[i] += nv[j];
610                                nv[j] = zero;
611                                elen[j] = none;
612                                j = next[j].sx();
613                                next[jlast] = I(j);
614                            } else {
615                                jlast = j;
616                                j = next[j].sx();
617                            }
618                        }
619
620                        wflg += one;
621                        i = next[i].sx();
622                    }
623                }
624            }
625
626            let mut p = pme1.zx();
627            let nleft = n - nel;
628            for pme in pme1.zx()..pme2.zx() + 1 {
629                let i = iw[pme].zx();
630                let nvi = -nv[i];
631                if nvi > zero {
632                    nv[i] = nvi;
633                    deg = degree[i].zx() + degme - nvi.zx();
634                    deg = Ord::min(deg, nleft - nvi.zx());
635
636                    let inext = head[deg];
637                    if inext != none {
638                        last[inext.zx()] = I(i);
639                    }
640                    next[i] = inext;
641                    last[i] = none;
642                    head[deg] = I(i);
643
644                    mindeg = Ord::min(mindeg, deg);
645                    degree[i] = I(deg);
646                    iw[p] = I(i);
647                    p += 1;
648                }
649            }
650            nv[me] = nvpiv;
651            len[me] = I(p) - pme1;
652            if len[me] == zero {
653                pe[me] = none;
654                w[me] = zero;
655            }
656            if elenme != zero {
657                pfree = p;
658            }
659
660            {
661                let f = nvpiv.sx() as isize as f64;
662                let r = degme as f64 + ndense.sx() as isize as f64;
663                let lnzme = f * r + (f - 1.0) * f / 2.0;
664                ndiv += lnzme;
665                let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
666                nms_lu += s;
667                nms_ldl += (s + lnzme) / 2.0;
668            }
669        }
670
671        {
672            let f = ndense.sx() as isize as f64;
673            let lnzme = (f - 1.0) * f / 2.0;
674            ndiv += lnzme;
675            let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
676            nms_lu += s;
677            nms_ldl += (s + lnzme) / 2.0;
678        }
679
680        for pe in pe.iter_mut() {
681            *pe = flip(*pe);
682        }
683        for elen in elen.iter_mut() {
684            *elen = flip(*elen);
685        }
686
687        for i in 0..n {
688            if nv[i] == zero {
689                let mut j = pe[i].sx();
690                if j == NONE {
691                    continue;
692                }
693
694                while nv[j] == zero {
695                    j = pe[j].zx();
696                }
697                let e = I(j);
698                let mut j = i;
699                while nv[j] == zero {
700                    let jnext = pe[j];
701                    pe[j] = e;
702                    j = jnext.zx();
703                }
704            }
705        }
706    }
707
708    ghost::with_size(n, |N| {
709        postorder(
710            Array::from_mut(w, N),
711            Array::from_ref(MaybeIdx::<'_, I>::from_slice_ref_checked(pe, N), N),
712            Array::from_ref(nv, N),
713            Array::from_ref(elen, N),
714            stack.rb_mut(),
715        );
716    });
717
718    let (head, _) = stack.make_raw::<I::Signed>(n);
719
720    mem::fill_none(head);
721    mem::fill_none(next);
722    for (e, &k) in w.iter().enumerate() {
723        if k != none {
724            head[k.zx()] = I(e);
725        }
726    }
727    nel = 0;
728    for &e in head.iter() {
729        if e == none {
730            break;
731        }
732        let e = e.zx();
733        next[e] = I(nel);
734        nel += nv[e].zx();
735    }
736    assert!(nel == n - ndense.zx());
737
738    for i in 0..n {
739        if nv[i] == zero {
740            let e = pe[i];
741            if e != none {
742                let e = e.zx();
743                next[i] = next[e];
744                next[e] += one;
745            } else {
746                next[i] = I(nel);
747                nel += 1;
748            }
749        }
750    }
751    assert!(nel == n);
752    for i in 0..n {
753        last[next[i].zx()] = I(i);
754    }
755
756    let _ = ncmpa;
757    FlopCount {
758        n_div: ndiv,
759        n_mult_subs_ldl: nms_ldl,
760        n_mult_subs_lu: nms_lu,
761    }
762}
763
764#[allow(clippy::comparison_chain)]
765fn amd_1<I: Index>(
766    perm: &mut [I::Signed],
767    perm_inv: &mut [I::Signed],
768    A: SymbolicSparseColMatRef<'_, I>,
769    len: &mut [I::Signed],
770    iwlen: usize,
771    control: Control,
772    stack: PodStack<'_>,
773) -> FlopCount {
774    let n = perm.len();
775    let I = I::Signed::truncate;
776
777    let zero = I(0);
778    let one = I(1);
779
780    let (p_e, stack) = stack.make_raw::<I::Signed>(n);
781    let (s_p, stack) = stack.make_raw::<I::Signed>(n);
782    let (i_w, mut stack) = stack.make_raw::<I::Signed>(iwlen);
783
784    // Construct the pointers for A+A'.
785
786    let mut pfree = zero;
787    for j in 0..n {
788        p_e[j] = pfree;
789        s_p[j] = pfree;
790        pfree += len[j];
791    }
792    let pfree = pfree.zx();
793
794    // Note that this restriction on iwlen is slightly more restrictive than
795    // what is strictly required in amd_2. amd_2 can operate with no elbow
796    // room at all, but it will be very slow. For better performance, at
797    // least size-n elbow room is enforced.
798    assert!(iwlen >= pfree + n);
799
800    ghost::with_size(n, |N| {
801        let (t_p, _) = stack.rb_mut().make_raw::<I::Signed>(n);
802
803        let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
804        let s_p = Array::from_mut(s_p, N);
805        let t_p = Array::from_mut(t_p, N);
806
807        for k in N.indices() {
808            // Construct A+A'.
809            let mut seen = zero;
810            for j in A.row_indices_of_col(k) {
811                if j < k {
812                    // Entry A(j,k) in the strictly upper triangular part.
813                    i_w[s_p[j].zx()] = I(*k);
814                    s_p[j] += one;
815
816                    i_w[s_p[k].zx()] = I(*j);
817                    s_p[k] += one;
818
819                    seen += one;
820                } else if j == k {
821                    // Skip the diagonal.
822                    seen += one;
823                    break;
824                } else {
825                    // j > k
826                    // First entry below the diagonal.
827                    break;
828                }
829
830                // Scan lower triangular part of A, in column j until reaching
831                // row k. Start where last scan left off.
832                let mut seen_j = zero;
833                for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
834                    let i = i.zx();
835                    if i < k {
836                        // A (i,j) is only in the lower part, not in upper.
837
838                        i_w[s_p[i].zx()] = I(*j);
839                        s_p[i] += one;
840
841                        i_w[s_p[j].zx()] = I(*i);
842                        s_p[j] += one;
843
844                        seen_j += one;
845                    } else if i == k {
846                        // Entry A(k,j) in lower part and A(j,k) in upper.
847                        seen_j += one;
848                        break;
849                    } else {
850                        // i > k
851                        // Consider this entry later, when k advances to i.
852                        break;
853                    }
854                }
855                t_p[j] += seen_j;
856            }
857            t_p[k] = seen;
858        }
859
860        // Clean up, for remaining mismatched entries.
861        for j in N.indices() {
862            for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
863                let i = i.zx();
864                i_w[s_p[i].zx()] = I(*j);
865                s_p[i] += one;
866
867                i_w[s_p[j].zx()] = I(*i);
868                s_p[j] += one;
869            }
870        }
871    });
872
873    debug_assert!(s_p[n - 1] == I(pfree));
874
875    amd_2::<I>(p_e, i_w, len, pfree, perm_inv, perm, control, stack)
876}
877
878fn preprocess<'out, I: Index>(
879    new_col_ptrs: &'out mut [I],
880    new_row_indices: &'out mut [I],
881    A: SymbolicSparseColMatRef<'_, I>,
882    stack: PodStack<'_>,
883) -> SymbolicSparseColMatRef<'out, I> {
884    let n = A.nrows();
885
886    ghost::with_size(n, |N| {
887        let I = I::Signed::truncate;
888        let zero = I(0);
889        let one = I(1);
890
891        let (w, stack) = stack.make_raw::<I::Signed>(n);
892        let (flag, _) = stack.make_raw::<I::Signed>(n);
893
894        let w = Array::from_mut(w, N);
895        let flag = Array::from_mut(flag, N);
896        let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
897
898        mem::fill_zero(w.as_mut());
899        mem::fill_none(flag.as_mut());
900
901        for j in N.indices() {
902            let j_ = I(*j);
903            for i in A.row_indices_of_col(j) {
904                if flag[i] != j_ {
905                    w[i] += one;
906                    flag[i] = j_;
907                }
908            }
909        }
910
911        new_col_ptrs[0] = I::from_signed(zero);
912        for (i, [r, r_next]) in zip(
913            N.indices(),
914            windows2(Cell::as_slice_of_cells(Cell::from_mut(new_col_ptrs))),
915        ) {
916            r_next.set(r.get() + I::from_signed(w[i]));
917        }
918
919        w.as_mut()
920            .copy_from_slice(bytemuck::cast_slice(&new_col_ptrs[..n]));
921        mem::fill_none(flag.as_mut());
922
923        for j in N.indices() {
924            let j_ = I(*j);
925            for i in A.row_indices_of_col(j) {
926                if flag[i] != j_ {
927                    new_row_indices[w[i].zx()] = I::from_signed(j_);
928                    w[i] += one;
929                    flag[i] = j_;
930                }
931            }
932        }
933
934        unsafe {
935            SymbolicSparseColMatRef::new_unchecked(
936                n,
937                n,
938                &*new_col_ptrs,
939                None,
940                &new_row_indices[..new_col_ptrs[n].zx()],
941            )
942        }
943    })
944}
945
946#[allow(clippy::comparison_chain)]
947fn aat<I: Index>(
948    len: &mut [I::Signed],
949    A: SymbolicSparseColMatRef<'_, I>,
950    stack: PodStack<'_>,
951) -> Result<usize, FaerError> {
952    ghost::with_size(A.nrows(), |N| {
953        let I = I::Signed::truncate;
954        let zero = I(0);
955        let one = I(1);
956        let A = ghost::SymbolicSparseColMatRef::new(A, N, N);
957
958        let n = *N;
959
960        let t_p = &mut *stack.make_raw::<I::Signed>(n).0; // local workspace
961
962        let len = Array::from_mut(len, N);
963        let t_p = Array::from_mut(t_p, N);
964
965        mem::fill_zero(len.as_mut());
966
967        for k in N.indices() {
968            let mut seen = zero;
969
970            for j in A.row_indices_of_col(k) {
971                if j < k {
972                    seen += one;
973                    len[j] += one;
974                    len[k] += one;
975                } else if j == k {
976                    seen += one;
977                    break;
978                } else {
979                    break;
980                }
981
982                let mut seen_j = zero;
983                for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
984                    let i = i.zx();
985                    if i < k {
986                        len[i] += one;
987                        len[j] += one;
988                        seen_j += one;
989                    } else if i == k {
990                        seen_j += one;
991                        break;
992                    } else {
993                        break;
994                    }
995                }
996                t_p[j] += seen_j;
997            }
998            t_p[k] = seen;
999        }
1000
1001        for j in N.indices() {
1002            for i in &A.row_indices_of_col_raw(j)[t_p[j].zx()..] {
1003                len[i.zx()] += one;
1004                len[j] += one;
1005            }
1006        }
1007    });
1008    let nzaat = I::Signed::sum_nonnegative(len).map(I::from_signed);
1009    nzaat.ok_or(FaerError::IndexOverflow).map(I::zx)
1010}
1011
1012pub fn order_sorted_req<I: Index>(n: usize, nnz_upper: usize) -> Result<StackReq, SizeOverflow> {
1013    let n_req = StackReq::try_new::<I>(n)?;
1014    let nzaat = nnz_upper.checked_mul(2).ok_or(SizeOverflow)?;
1015    StackReq::try_all_of([
1016        // len
1017        n_req,
1018        // A+A.T plus elbow room
1019        StackReq::try_new::<I>(nzaat.checked_add(nzaat / 5).ok_or(SizeOverflow)?)?,
1020        n_req,
1021        // p_e
1022        n_req,
1023        // s_p
1024        n_req,
1025        // i_w
1026        n_req,
1027        // w
1028        n_req,
1029        // nv
1030        n_req,
1031        // elen
1032        n_req,
1033        // child
1034        n_req,
1035        // sibling
1036        n_req,
1037        // stack
1038        n_req,
1039    ])
1040}
1041
1042pub fn order_maybe_unsorted_req<I: Index>(
1043    n: usize,
1044    nnz_upper: usize,
1045) -> Result<StackReq, SizeOverflow> {
1046    StackReq::try_all_of([
1047        order_sorted_req::<I>(n, nnz_upper)?,
1048        StackReq::try_new::<I>(n + 1)?,
1049        StackReq::try_new::<I>(nnz_upper)?,
1050    ])
1051}
1052
1053pub fn order_sorted<I: Index>(
1054    perm: &mut [I],
1055    perm_inv: &mut [I],
1056    A: SymbolicSparseColMatRef<'_, I>,
1057    control: Control,
1058    stack: PodStack<'_>,
1059) -> Result<FlopCount, FaerError> {
1060    let n = perm.len();
1061
1062    if n == 0 {
1063        return Ok(FlopCount {
1064            n_div: 0.0,
1065            n_mult_subs_ldl: 0.0,
1066            n_mult_subs_lu: 0.0,
1067        });
1068    }
1069
1070    let (len, mut stack) = stack.make_raw::<I::Signed>(n);
1071    let nzaat = aat(len, A, stack.rb_mut())?;
1072    let iwlen = nzaat
1073        .checked_add(nzaat / 5)
1074        .and_then(|x| x.checked_add(n))
1075        .ok_or(FaerError::IndexOverflow)?;
1076    Ok(amd_1::<I>(
1077        bytemuck::cast_slice_mut(perm),
1078        bytemuck::cast_slice_mut(perm_inv),
1079        A,
1080        len,
1081        iwlen,
1082        control,
1083        stack,
1084    ))
1085}
1086
1087/// # Note
1088/// Allows unsorted matrices.
1089pub fn order_maybe_unsorted<I: Index>(
1090    perm: &mut [I],
1091    perm_inv: &mut [I],
1092    A: SymbolicSparseColMatRef<'_, I>,
1093    control: Control,
1094    stack: PodStack<'_>,
1095) -> Result<FlopCount, FaerError> {
1096    let n = perm.len();
1097
1098    if n == 0 {
1099        return Ok(FlopCount {
1100            n_div: 0.0,
1101            n_mult_subs_ldl: 0.0,
1102            n_mult_subs_lu: 0.0,
1103        });
1104    }
1105    let nnz = A.compute_nnz();
1106    let (new_col_ptrs, stack) = stack.make_raw::<I>(n + 1);
1107    let (new_row_indices, mut stack) = stack.make_raw::<I>(nnz);
1108    let A = preprocess(new_col_ptrs, new_row_indices, A, stack.rb_mut());
1109    order_sorted(perm, perm_inv, A, control, stack)
1110}
1111
1112#[derive(Debug, Copy, Clone, PartialEq)]
1113pub struct Control {
1114    /// "dense" if degree > dense * sqrt(n)
1115    pub dense: f64,
1116    /// Do aggressive absorption.
1117    pub aggressive: bool,
1118}
1119
1120impl Default for Control {
1121    #[inline]
1122    fn default() -> Self {
1123        Self {
1124            dense: 10.0,
1125            aggressive: true,
1126        }
1127    }
1128}
1129
1130#[derive(Default, Debug, Copy, Clone, PartialEq)]
1131pub struct FlopCount {
1132    pub n_div: f64,
1133    pub n_mult_subs_ldl: f64,
1134    pub n_mult_subs_lu: f64,
1135}