#![allow(dead_code)]
use super::workspace::{clear_flag, flip, Workspace, NONE};
use crate::OrderingError;
#[derive(Debug, Clone, Copy, Default)]
pub struct StepFlops {
pub ndiv: f64,
pub nms_lu: f64,
pub nms_ldl: f64,
}
impl StepFlops {
fn accumulate(&mut self, other: StepFlops) {
self.ndiv += other.ndiv;
self.nms_lu += other.nms_lu;
self.nms_ldl += other.nms_ldl;
}
}
pub fn select_pivot(ws: &mut Workspace) -> Option<usize> {
let n = ws.n;
let mut deg = ws.mindeg;
let mut me_signed: i32 = NONE;
while deg < n {
let h = ws.head[deg];
if h != NONE {
me_signed = h;
break;
}
deg += 1;
}
if me_signed == NONE {
return None;
}
ws.mindeg = deg;
let me = me_signed as usize;
let inext = ws.next[me];
if inext != NONE {
ws.last[inext as usize] = NONE;
}
ws.head[deg] = inext;
Some(me)
}
pub fn create_element(
ws: &mut Workspace,
me: usize,
) -> Result<(usize, usize, i32, usize), OrderingError> {
let elenme = ws.elen[me];
let nvpiv = ws.nv[me];
ws.nel += nvpiv as usize;
ws.nv[me] = -nvpiv;
let mut degme: usize = 0;
let pme1: usize;
let pme2: i32;
if elenme == 0 {
let pme1_s = ws.pe[me];
pme1 = pme1_s as usize;
let list_start = pme1;
let list_end = list_start + ws.len[me] as usize;
let mut pme2_s = pme1_s - 1;
for p in list_start..list_end {
let i = ws.iw[p] as usize;
let nvi = ws.nv[i];
if nvi > 0 {
degme += nvi as usize;
ws.nv[i] = -nvi;
pme2_s += 1;
ws.iw[pme2_s as usize] = i as i32;
let ilast = ws.last[i];
let inext = ws.next[i];
if inext != NONE {
ws.last[inext as usize] = ilast;
}
if ilast != NONE {
ws.next[ilast as usize] = inext;
} else {
ws.head[ws.degree[i] as usize] = inext;
}
}
}
pme2 = pme2_s;
} else {
let mut p = ws.pe[me] as usize;
let mut pme1_rw: usize = ws.pfree;
let slenme = (ws.len[me] - elenme) as usize;
let elenme_u = elenme as usize;
for knt1 in 1..=elenme_u + 1 {
let e: usize;
let mut pj: usize;
let ln: usize;
if knt1 > elenme_u {
e = me;
pj = p;
ln = slenme;
} else {
e = ws.iw[p] as usize;
p += 1;
pj = ws.pe[e] as usize;
ln = ws.len[e] as usize;
}
for knt2 in 1..=ln {
let i = ws.iw[pj] as usize;
pj += 1;
let nvi = ws.nv[i];
if nvi > 0 {
if ws.pfree >= ws.iwlen {
ws.pe[me] = p as i32;
ws.len[me] -= knt1 as i32;
if ws.len[me] == 0 {
ws.pe[me] = NONE;
}
ws.pe[e] = pj as i32;
ws.len[e] = (ln - knt2) as i32;
if ws.len[e] == 0 {
ws.pe[e] = NONE;
}
ws.ncmpa += 1;
for j in 0..ws.n {
let pn = ws.pe[j];
if pn >= 0 {
let pn_u = pn as usize;
ws.pe[j] = ws.iw[pn_u];
ws.iw[pn_u] = flip(j as i32);
}
}
let mut psrc = 0usize;
let mut pdst = 0usize;
let pend = pme1_rw;
while psrc < pend {
let j_marker = flip(ws.iw[psrc]);
psrc += 1;
if j_marker >= 0 {
let j = j_marker as usize;
ws.iw[pdst] = ws.pe[j];
ws.pe[j] = pdst as i32;
pdst += 1;
let lenj = ws.len[j] as usize;
if lenj > 0 {
ws.iw.copy_within(psrc..psrc + lenj - 1, pdst);
psrc += lenj - 1;
pdst += lenj - 1;
}
}
}
let p1 = pdst;
ws.iw.copy_within(pme1_rw..ws.pfree, pdst);
pdst += ws.pfree - pme1_rw;
pme1_rw = p1;
ws.pfree = pdst;
pj = ws.pe[e] as usize;
p = ws.pe[me] as usize;
}
degme += nvi as usize;
ws.nv[i] = -nvi;
ws.iw[ws.pfree] = i as i32;
ws.pfree += 1;
let ilast = ws.last[i];
let inext = ws.next[i];
if inext != NONE {
ws.last[inext as usize] = ilast;
}
if ilast != NONE {
ws.next[ilast as usize] = inext;
} else {
ws.head[ws.degree[i] as usize] = inext;
}
}
}
if e != me {
ws.pe[e] = flip(me as i32);
ws.w[e] = 0;
}
}
pme1 = pme1_rw;
pme2 = (ws.pfree - 1) as i32;
}
ws.degree[me] = degme as i32;
ws.pe[me] = pme1 as i32;
ws.len[me] = pme2 - pme1 as i32 + 1;
ws.elen[me] = flip(nvpiv + degme as i32);
ws.wflg = clear_flag(ws.wflg, ws.wbig, &mut ws.w);
let pme2_excl: usize = if pme2 < pme1 as i32 {
pme1
} else {
(pme2 + 1) as usize
};
Ok((pme1, pme2_excl, nvpiv, degme))
}
#[allow(clippy::too_many_arguments)]
pub fn finalize_step(
ws: &mut Workspace,
me: usize,
pme1: usize,
pme2_excl: usize,
nvpiv: i32,
degme: usize,
elenme: i32,
aggressive: bool,
) -> StepFlops {
let mut degme = degme;
let mut nvpiv = nvpiv;
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let eln = ws.elen[i];
if eln > 0 {
let nvi = -ws.nv[i];
let wnvi = ws.wflg - nvi;
let pi = ws.pe[i] as usize;
for k in 0..eln as usize {
let e = ws.iw[pi + k] as usize;
let mut we = ws.w[e];
if we >= ws.wflg {
we -= nvi;
} else if we != 0 {
we = ws.degree[e] + wnvi;
}
ws.w[e] = we;
}
}
}
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let p1 = ws.pe[i] as usize;
let p2 = p1 + ws.elen[i] as usize;
let mut pn = p1;
let mut deg: usize = 0;
let mut hash: usize = 0;
if aggressive {
for p in p1..p2 {
let e = ws.iw[p] as usize;
let we = ws.w[e];
if we != 0 {
let dext = we - ws.wflg;
if dext > 0 {
deg += dext as usize;
ws.iw[pn] = e as i32;
pn += 1;
hash = hash.wrapping_add(e);
} else {
ws.pe[e] = flip(me as i32);
ws.w[e] = 0;
}
}
}
} else {
for p in p1..p2 {
let e = ws.iw[p] as usize;
let we = ws.w[e];
if we != 0 {
debug_assert!(
we >= ws.wflg,
"stale mark: w[e]={we} < wflg={} would wrap as usize",
ws.wflg
);
let dext = (we - ws.wflg) as usize;
deg += dext;
ws.iw[pn] = e as i32;
pn += 1;
hash = hash.wrapping_add(e);
}
}
}
ws.elen[i] = (pn - p1 + 1) as i32;
let p3 = pn;
let p4 = p1 + ws.len[i] as usize;
for p in p2..p4 {
let j = ws.iw[p] as usize;
let nvj = ws.nv[j];
if nvj > 0 {
deg += nvj as usize;
ws.iw[pn] = j as i32;
pn += 1;
hash = hash.wrapping_add(j);
}
}
if ws.elen[i] == 1 && p3 == pn {
ws.pe[i] = flip(me as i32);
let nvi = -ws.nv[i];
debug_assert!(nvi >= 0);
degme -= nvi as usize;
nvpiv += nvi;
ws.nel += nvi as usize;
ws.nv[i] = 0;
ws.elen[i] = NONE;
ws.n_mass_elim += 1;
} else {
ws.degree[i] = ws.degree[i].min(deg as i32);
if p1 != pn {
ws.iw[pn] = ws.iw[p3];
}
if p3 != p1 {
ws.iw[p3] = ws.iw[p1];
}
ws.iw[p1] = me as i32;
ws.len[i] = (pn - p1 + 1) as i32;
let h = hash % ws.n;
let j = ws.head[h];
if j <= NONE {
ws.next[i] = flip(j);
ws.head[h] = flip(i as i32);
} else {
ws.next[i] = ws.last[j as usize];
ws.last[j as usize] = i as i32;
}
ws.last[i] = h as i32;
}
}
let degme_i32 = degme as i32;
ws.degree[me] = degme_i32;
if degme_i32 > ws.lemax {
ws.lemax = degme_i32;
}
ws.wflg += ws.lemax;
ws.wflg = clear_flag(ws.wflg, ws.wbig, &mut ws.w);
for pme in pme1..pme2_excl {
let i_anchor = ws.iw[pme] as usize;
if ws.nv[i_anchor] >= 0 {
continue; }
let h = ws.last[i_anchor] as usize;
let j_head = ws.head[h];
let mut i: i32 = if j_head == NONE {
NONE
} else if j_head < NONE {
ws.head[h] = NONE;
flip(j_head)
} else {
let chain_start = ws.last[j_head as usize];
ws.last[j_head as usize] = NONE;
chain_start
};
while i != NONE && ws.next[i as usize] != NONE {
let i_u = i as usize;
let ln = ws.len[i_u];
let eln = ws.elen[i_u];
let pi = ws.pe[i_u];
for p in (pi + 1) as usize..(pi + ln) as usize {
ws.w[ws.iw[p] as usize] = ws.wflg;
}
let mut jlast = i_u;
let mut jp = ws.next[i_u];
while jp != NONE {
let jj = jp as usize;
let mut ok = ws.len[jj] == ln && ws.elen[jj] == eln;
if ok {
let pj = ws.pe[jj];
for p in (pj + 1) as usize..(pj + ln) as usize {
if ws.w[ws.iw[p] as usize] != ws.wflg {
ok = false;
break;
}
}
}
if ok {
ws.pe[jj] = flip(i);
ws.nv[i_u] += ws.nv[jj];
ws.nv[jj] = 0;
ws.elen[jj] = NONE;
jp = ws.next[jj];
ws.next[jlast] = jp;
ws.n_supervar_merge += 1;
} else {
jlast = jj;
jp = ws.next[jj];
}
}
ws.wflg += 1;
i = ws.next[i_u];
}
}
let mut p_write = pme1;
let nleft = ws.n - ws.nel;
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let nvi = -ws.nv[i];
if nvi > 0 {
ws.nv[i] = nvi;
let mut d = ws.degree[i] as usize + degme_i32 as usize - nvi as usize;
let cap = nleft - nvi as usize;
if d > cap {
d = cap;
}
let inext = ws.head[d];
if inext != NONE {
ws.last[inext as usize] = i as i32;
}
ws.next[i] = inext;
ws.last[i] = NONE;
ws.head[d] = i as i32;
if d < ws.mindeg {
ws.mindeg = d;
}
ws.degree[i] = d as i32;
ws.iw[p_write] = i as i32;
p_write += 1;
}
}
ws.nv[me] = nvpiv;
ws.len[me] = (p_write as i32) - pme1 as i32;
if ws.len[me] == 0 {
ws.pe[me] = NONE;
ws.w[me] = 0;
}
if elenme != 0 {
ws.pfree = p_write;
}
let f = nvpiv as f64;
let r = degme_i32 as f64 + ws.ndense as f64;
let lnzme = f * r + (f - 1.0) * f / 2.0;
let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
StepFlops {
ndiv: lnzme,
nms_lu: s,
nms_ldl: (s + lnzme) / 2.0,
}
}
#[inline(always)]
fn amf_bucket_of(score: i64, n: usize) -> usize {
if score <= 0 {
return 0;
}
let s = score as usize;
if s <= n {
return s;
}
let pas = (n / 8).max(1);
let nbbuck = 2 * n;
((s - n) / pas + n).min(nbbuck)
}
#[inline(always)]
fn amf_wf_surface(dext: i64, degree: i64) -> i64 {
dext * (2 * degree - dext - 1)
}
#[inline(always)]
fn amf_wf_combine(wf4: i64, nvi: i64, wf3: i64) -> i64 {
wf4 + 2 * nvi * wf3
}
const AMF_DUMMY_I32: i32 = i32::MAX - 1;
pub fn select_pivot_amf(ws: &mut Workspace) -> Option<usize> {
let n = ws.n;
let nbuck = ws.head.len();
let mut deg = ws.mindeg;
while deg < nbuck && ws.head[deg] == NONE {
deg += 1;
}
if deg >= nbuck {
return None;
}
ws.mindeg = deg;
let head_me = ws.head[deg] as usize;
let me;
if deg > n {
let mut best = head_me;
let mut best_score = ws.wf[best];
let mut j = ws.next[best];
while j != NONE {
let ju = j as usize;
if ws.wf[ju] < best_score {
best_score = ws.wf[ju];
best = ju;
}
j = ws.next[ju];
}
me = best;
let ilast = ws.last[me];
let inext = ws.next[me];
if inext != NONE {
ws.last[inext as usize] = ilast;
}
if ilast != NONE {
ws.next[ilast as usize] = inext;
} else {
ws.head[deg] = inext;
}
} else {
me = head_me;
let inext = ws.next[me];
if inext != NONE {
ws.last[inext as usize] = NONE;
}
ws.head[deg] = inext;
}
Some(me)
}
pub fn create_element_amf(
ws: &mut Workspace,
me: usize,
) -> Result<(usize, usize, i32, usize), OrderingError> {
let n = ws.n;
let elenme = ws.elen[me];
let nvpiv = ws.nv[me];
ws.nel += nvpiv as usize;
ws.nv[me] = -nvpiv;
let mut degme: usize = 0;
let pme1: usize;
let pme2: i32;
if elenme == 0 {
let pme1_s = ws.pe[me];
pme1 = pme1_s as usize;
let list_start = pme1;
let list_end = list_start + ws.len[me] as usize;
let mut pme2_s = pme1_s - 1;
for p in list_start..list_end {
let i = ws.iw[p] as usize;
let nvi = ws.nv[i];
if nvi > 0 {
degme += nvi as usize;
ws.nv[i] = -nvi;
pme2_s += 1;
ws.iw[pme2_s as usize] = i as i32;
let ilast = ws.last[i];
let inext = ws.next[i];
if inext != NONE {
ws.last[inext as usize] = ilast;
}
if ilast != NONE {
ws.next[ilast as usize] = inext;
} else {
let h_idx = amf_bucket_of(ws.wf[i], n);
ws.head[h_idx] = inext;
}
}
}
pme2 = pme2_s;
} else {
let mut p = ws.pe[me] as usize;
let mut pme1_rw: usize = ws.pfree;
let slenme = (ws.len[me] - elenme) as usize;
let elenme_u = elenme as usize;
for knt1 in 1..=elenme_u + 1 {
let e: usize;
let mut pj: usize;
let ln: usize;
if knt1 > elenme_u {
e = me;
pj = p;
ln = slenme;
} else {
e = ws.iw[p] as usize;
p += 1;
pj = ws.pe[e] as usize;
ln = ws.len[e] as usize;
}
for knt2 in 1..=ln {
let i = ws.iw[pj] as usize;
pj += 1;
let nvi = ws.nv[i];
if nvi > 0 {
if ws.pfree >= ws.iwlen {
ws.pe[me] = p as i32;
ws.len[me] -= knt1 as i32;
if ws.len[me] == 0 {
ws.pe[me] = NONE;
}
ws.pe[e] = pj as i32;
ws.len[e] = (ln - knt2) as i32;
if ws.len[e] == 0 {
ws.pe[e] = NONE;
}
ws.ncmpa += 1;
for j in 0..ws.n {
let pn = ws.pe[j];
if pn >= 0 {
let pn_u = pn as usize;
ws.pe[j] = ws.iw[pn_u];
ws.iw[pn_u] = flip(j as i32);
}
}
let mut psrc = 0usize;
let mut pdst = 0usize;
let pend = pme1_rw;
while psrc < pend {
let j_marker = flip(ws.iw[psrc]);
psrc += 1;
if j_marker >= 0 {
let j = j_marker as usize;
ws.iw[pdst] = ws.pe[j];
ws.pe[j] = pdst as i32;
pdst += 1;
let lenj = ws.len[j] as usize;
if lenj > 0 {
ws.iw.copy_within(psrc..psrc + lenj - 1, pdst);
psrc += lenj - 1;
pdst += lenj - 1;
}
}
}
let p1 = pdst;
ws.iw.copy_within(pme1_rw..ws.pfree, pdst);
pdst += ws.pfree - pme1_rw;
pme1_rw = p1;
ws.pfree = pdst;
pj = ws.pe[e] as usize;
p = ws.pe[me] as usize;
}
degme += nvi as usize;
ws.nv[i] = -nvi;
ws.iw[ws.pfree] = i as i32;
ws.pfree += 1;
let ilast = ws.last[i];
let inext = ws.next[i];
if inext != NONE {
ws.last[inext as usize] = ilast;
}
if ilast != NONE {
ws.next[ilast as usize] = inext;
} else {
let h_idx = amf_bucket_of(ws.wf[i], n);
ws.head[h_idx] = inext;
}
}
}
if e != me {
ws.pe[e] = flip(me as i32);
ws.w[e] = 0;
}
}
pme1 = pme1_rw;
pme2 = (ws.pfree - 1) as i32;
}
ws.degree[me] = degme as i32;
ws.pe[me] = pme1 as i32;
ws.len[me] = pme2 - pme1 as i32 + 1;
ws.elen[me] = flip(nvpiv + degme as i32);
ws.wflg = clear_flag(ws.wflg, ws.wbig, &mut ws.w);
let pme2_excl: usize = if pme2 < pme1 as i32 {
pme1
} else {
(pme2 + 1) as usize
};
Ok((pme1, pme2_excl, nvpiv, degme))
}
#[allow(clippy::too_many_arguments)]
pub fn finalize_step_amf(
ws: &mut Workspace,
me: usize,
pme1: usize,
pme2_excl: usize,
nvpiv: i32,
degme: usize,
elenme: i32,
aggressive: bool,
) -> StepFlops {
let n = ws.n;
let mut degme = degme;
let mut nvpiv = nvpiv;
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let eln = ws.elen[i];
if eln > 0 {
let nvi = -ws.nv[i];
let wnvi = ws.wflg - nvi;
let pi = ws.pe[i] as usize;
for k in 0..eln as usize {
let e = ws.iw[pi + k] as usize;
let mut we = ws.w[e];
if we >= ws.wflg {
we -= nvi;
} else if we != 0 {
we = ws.degree[e] + wnvi;
ws.wf[e] = 0;
}
ws.w[e] = we;
}
}
}
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let p1 = ws.pe[i] as usize;
let p2 = p1 + ws.elen[i] as usize;
let mut pn = p1;
let mut deg: usize = 0;
let mut hash: usize = 0;
let mut wf3: i64 = 0;
let mut wf4: i64 = 0;
let nvi = -ws.nv[i];
if aggressive {
for p in p1..p2 {
let e = ws.iw[p] as usize;
let we = ws.w[e];
if we != 0 {
let dext = we - ws.wflg;
if dext > 0 {
if ws.wf[e] == 0 {
ws.wf[e] = amf_wf_surface(dext as i64, ws.degree[e] as i64);
}
wf4 += ws.wf[e];
deg += dext as usize;
ws.iw[pn] = e as i32;
pn += 1;
hash = hash.wrapping_add(e);
} else {
ws.pe[e] = flip(me as i32);
ws.w[e] = 0;
}
}
}
} else {
for p in p1..p2 {
let e = ws.iw[p] as usize;
let we = ws.w[e];
if we != 0 {
let dext = we - ws.wflg;
debug_assert!(
dext >= 0,
"stale mark: w[e]={we} < wflg={} would wrap as usize",
ws.wflg
);
if ws.wf[e] == 0 {
ws.wf[e] = amf_wf_surface(dext as i64, ws.degree[e] as i64);
}
wf4 += ws.wf[e];
deg += dext as usize;
ws.iw[pn] = e as i32;
pn += 1;
hash = hash.wrapping_add(e);
}
}
}
ws.elen[i] = (pn - p1 + 1) as i32;
let p3 = pn;
let p4 = p1 + ws.len[i] as usize;
for p in p2..p4 {
let j = ws.iw[p] as usize;
let nvj = ws.nv[j];
if nvj > 0 {
deg += nvj as usize;
wf3 += nvj as i64;
ws.iw[pn] = j as i32;
pn += 1;
hash = hash.wrapping_add(j);
}
}
if ws.elen[i] == 1 && p3 == pn {
ws.pe[i] = flip(me as i32);
let nvi_sv = -ws.nv[i];
debug_assert!(nvi_sv >= 0);
degme -= nvi_sv as usize;
nvpiv += nvi_sv;
ws.nel += nvi_sv as usize;
ws.nv[i] = 0;
ws.elen[i] = NONE;
ws.n_mass_elim += 1;
} else {
if ws.degree[i] < deg as i32 {
wf3 = 0;
wf4 = 0;
} else {
ws.degree[i] = deg as i32;
}
ws.wf[i] = amf_wf_combine(wf4, nvi as i64, wf3);
if p1 != pn {
ws.iw[pn] = ws.iw[p3];
}
if p3 != p1 {
ws.iw[p3] = ws.iw[p1];
}
ws.iw[p1] = me as i32;
ws.len[i] = (pn - p1 + 1) as i32;
let h = hash % n;
let j = ws.head[h];
if j <= NONE {
ws.next[i] = flip(j);
ws.head[h] = flip(i as i32);
} else {
ws.next[i] = ws.last[j as usize];
ws.last[j as usize] = i as i32;
}
ws.last[i] = h as i32;
}
}
let degme_i32 = degme as i32;
ws.degree[me] = degme_i32;
if degme_i32 > ws.lemax {
ws.lemax = degme_i32;
}
ws.wflg += ws.lemax;
ws.wflg = clear_flag(ws.wflg, ws.wbig, &mut ws.w);
for pme in pme1..pme2_excl {
let i_anchor = ws.iw[pme] as usize;
if ws.nv[i_anchor] >= 0 {
continue;
}
let h = ws.last[i_anchor] as usize;
let j_head = ws.head[h];
let mut i: i32 = if j_head == NONE {
NONE
} else if j_head < NONE {
ws.head[h] = NONE;
flip(j_head)
} else {
let chain_start = ws.last[j_head as usize];
ws.last[j_head as usize] = NONE;
chain_start
};
while i != NONE && ws.next[i as usize] != NONE {
let i_u = i as usize;
let ln = ws.len[i_u];
let eln = ws.elen[i_u];
let pi = ws.pe[i_u];
for p in (pi + 1) as usize..(pi + ln) as usize {
ws.w[ws.iw[p] as usize] = ws.wflg;
}
let mut jlast = i_u;
let mut jp = ws.next[i_u];
while jp != NONE {
let jj = jp as usize;
let mut ok = ws.len[jj] == ln && ws.elen[jj] == eln;
if ok {
let pj = ws.pe[jj];
for p in (pj + 1) as usize..(pj + ln) as usize {
if ws.w[ws.iw[p] as usize] != ws.wflg {
ok = false;
break;
}
}
}
if ok {
ws.pe[jj] = flip(i);
let wf_j = ws.wf[jj];
if wf_j > ws.wf[i_u] {
ws.wf[i_u] = wf_j;
}
ws.nv[i_u] += ws.nv[jj];
ws.nv[jj] = 0;
ws.elen[jj] = NONE;
jp = ws.next[jj];
ws.next[jlast] = jp;
ws.n_supervar_merge += 1;
} else {
jlast = jj;
jp = ws.next[jj];
}
}
ws.wflg += 1;
i = ws.next[i_u];
}
}
let dummy_f = AMF_DUMMY_I32 as f64;
let n_f = if n == 0 { 1.0 } else { n as f64 };
let mut p_write = pme1;
let nleft = ws.n - ws.nel;
for pme in pme1..pme2_excl {
let i = ws.iw[pme] as usize;
let nvi = -ws.nv[i];
if nvi > 0 {
ws.nv[i] = nvi;
let degme_i = degme_i32;
let nvi_i = nvi;
let rmf: f64;
let deg_i = ws.degree[i];
if (deg_i as usize) + (degme_i as usize) > nleft {
let deg_f = deg_i as f64;
let rmf1 = deg_f * (deg_f - 1.0 + 2.0 * degme_i as f64) - ws.wf[i] as f64;
let new_deg = (nleft as i32) - nvi_i;
ws.degree[i] = new_deg;
let nd = new_deg as f64;
let rmf_new =
nd * (nd - 1.0) - (degme_i - nvi_i) as f64 * (degme_i - nvi_i - 1) as f64;
rmf = rmf_new.min(rmf1);
} else {
let deg_f = deg_i as f64;
ws.degree[i] = deg_i + degme_i - nvi_i;
rmf = deg_f * (deg_f - 1.0 + 2.0 * degme_i as f64) - ws.wf[i] as f64;
}
let rmf = rmf / (nvi_i as f64 + 1.0);
let qscore: i32 = if rmf < dummy_f {
rmf.round() as i32
} else if rmf / n_f < dummy_f {
(rmf / n_f).round() as i32
} else {
AMF_DUMMY_I32
};
ws.wf[i] = qscore.max(1) as i64;
let d = amf_bucket_of(ws.wf[i], n);
let inext = ws.head[d];
if inext != NONE {
ws.last[inext as usize] = i as i32;
}
ws.next[i] = inext;
ws.last[i] = NONE;
ws.head[d] = i as i32;
if d < ws.mindeg {
ws.mindeg = d;
}
ws.iw[p_write] = i as i32;
p_write += 1;
}
}
ws.nv[me] = nvpiv;
ws.len[me] = (p_write as i32) - pme1 as i32;
if ws.len[me] == 0 {
ws.pe[me] = NONE;
ws.w[me] = 0;
}
if elenme != 0 {
ws.pfree = p_write;
}
let f = nvpiv as f64;
let r = degme_i32 as f64 + ws.ndense as f64;
let lnzme = f * r + (f - 1.0) * f / 2.0;
let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
StepFlops {
ndiv: lnzme,
nms_lu: s,
nms_ldl: (s + lnzme) / 2.0,
}
}
pub fn run_elimination_amf(
ws: &mut Workspace,
aggressive: bool,
) -> Result<StepFlops, OrderingError> {
let mut flops = StepFlops::default();
while ws.nel < ws.n {
let me = match select_pivot_amf(ws) {
Some(m) => m,
None => break,
};
let elenme = ws.elen[me];
let (pme1, pme2, nvpiv, degme) = create_element_amf(ws, me)?;
flops.accumulate(finalize_step_amf(
ws, me, pme1, pme2, nvpiv, degme, elenme, aggressive,
));
}
let f = ws.ndense as f64;
let lnzme = (f - 1.0) * f / 2.0;
let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
flops.ndiv += lnzme;
flops.nms_lu += s;
flops.nms_ldl += (s + lnzme) / 2.0;
Ok(flops)
}
pub fn run_elimination(ws: &mut Workspace, aggressive: bool) -> Result<StepFlops, OrderingError> {
let mut flops = StepFlops::default();
while ws.nel < ws.n {
let me = match select_pivot(ws) {
Some(m) => m,
None => break, };
let elenme = ws.elen[me];
let (pme1, pme2, nvpiv, degme) = create_element(ws, me)?;
flops.accumulate(finalize_step(
ws, me, pme1, pme2, nvpiv, degme, elenme, aggressive,
));
}
let f = ws.ndense as f64;
let lnzme = (f - 1.0) * f / 2.0;
let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
flops.ndiv += lnzme;
flops.nms_lu += s;
flops.nms_ldl += (s + lnzme) / 2.0;
Ok(flops)
}
pub fn finalize_permutation(ws: &mut Workspace) -> Vec<i32> {
let n = ws.n;
if n == 0 {
return Vec::new();
}
for x in ws.pe.iter_mut() {
*x = flip(*x);
}
for x in ws.elen.iter_mut() {
*x = flip(*x);
}
for i in 0..n {
if ws.nv[i] == 0 {
let head_i = ws.pe[i];
if head_i == NONE {
continue;
}
let mut j = head_i as usize;
while ws.nv[j] == 0 {
j = ws.pe[j] as usize;
}
let e = j as i32;
let mut j = i;
while ws.nv[j] == 0 {
let jnext = ws.pe[j];
ws.pe[j] = e;
j = jnext as usize;
}
}
}
assembly_tree_postorder(ws);
for x in ws.head.iter_mut() {
*x = NONE;
}
for e in 0..n {
let k = ws.w[e];
if k != NONE {
ws.head[k as usize] = e as i32;
}
}
for x in ws.next.iter_mut() {
*x = NONE;
}
let mut nel: i32 = 0;
for &e in ws.head.iter() {
if e == NONE {
break;
}
let eu = e as usize;
ws.next[eu] = nel;
nel += ws.nv[eu];
}
for i in 0..n {
if ws.nv[i] == 0 {
let e = ws.pe[i];
if e != NONE {
let eu = e as usize;
ws.next[i] = ws.next[eu];
ws.next[eu] += 1;
} else {
ws.next[i] = nel;
nel += 1;
}
}
}
let mut perm = vec![0i32; n];
for i in 0..n {
perm[ws.next[i] as usize] = i as i32;
}
perm
}
fn assembly_tree_postorder(ws: &mut Workspace) {
let n = ws.n;
for x in ws.head.iter_mut() {
*x = NONE;
}
for x in ws.next.iter_mut() {
*x = NONE;
}
for j in (0..n).rev() {
if ws.nv[j] > 0 {
let parent = ws.pe[j];
if parent >= 0 && (parent as usize) < n {
let pu = parent as usize;
ws.next[j] = ws.head[pu];
ws.head[pu] = j as i32;
}
}
}
for i in 0..n {
if ws.nv[i] > 0 && ws.head[i] != NONE {
let child0 = ws.head[i];
let mut fprev: i32 = NONE;
let mut bigfprev: i32 = NONE;
let mut bigf: i32 = NONE;
let mut maxfrsize: i32 = NONE;
let mut f = child0;
while f != NONE {
let fu = f as usize;
let frsize = ws.elen[fu];
if frsize >= maxfrsize {
maxfrsize = frsize;
bigfprev = fprev;
bigf = f;
}
fprev = f;
f = ws.next[fu];
}
let bigfu = bigf as usize;
let fnext = ws.next[bigfu];
if fnext != NONE {
if bigfprev != NONE {
ws.next[bigfprev as usize] = fnext;
} else {
ws.head[i] = fnext;
}
ws.next[bigfu] = NONE;
ws.next[fprev as usize] = bigf;
}
}
}
for x in ws.w.iter_mut() {
*x = NONE;
}
let mut k: usize = 0;
for i in 0..n {
if ws.pe[i] == NONE && ws.nv[i] > 0 {
k = post_tree_dfs(ws, i, k);
}
}
}
fn post_tree_dfs(ws: &mut Workspace, root: usize, k_start: usize) -> usize {
let mut k = k_start;
let mut top: usize = 1;
ws.last[0] = root as i32;
while top > 0 {
let i = ws.last[top - 1] as usize;
let child0 = ws.head[i];
if child0 != NONE {
let mut count = 0usize;
let mut f = child0;
while f != NONE {
count += 1;
f = ws.next[f as usize];
}
let new_top = top + count;
let mut t = new_top;
let mut f = child0;
loop {
t -= 1;
ws.last[t] = f;
let nf = ws.next[f as usize];
if nf == NONE {
break;
}
f = nf;
}
top = new_top;
ws.head[i] = NONE; } else {
top -= 1;
ws.w[i] = k as i32;
k += 1;
}
}
k
}
#[cfg(test)]
mod tests {
use super::*;
use crate::quotient_graph::WorkspaceOptions;
use crate::CscPattern;
fn ws_for<'a>(n: usize, cp: &'a [i32], ri: &'a [i32]) -> Workspace {
let p = CscPattern::new(n, cp, ri).unwrap();
Workspace::new(&p, &WorkspaceOptions::default()).unwrap()
}
#[test]
fn amf_wf_kernels_do_not_overflow_i32() {
assert!(2_147_534_622_i64 > i32::MAX as i64);
assert_eq!(amf_wf_surface(46342, 46342), 2_147_534_622_i64);
assert_eq!(amf_wf_surface(3, 4), 12);
assert!(4_295_161_928_i64 > u32::MAX as i64);
assert_eq!(amf_wf_combine(0, 46342, 46342), 4_295_161_928_i64);
assert_eq!(amf_wf_combine(5, 3, 4), 29);
}
#[test]
fn select_pivot_empty() {
let cp = [0, 1, 2, 3, 4];
let ri = [0, 1, 2, 3];
let mut ws = ws_for(4, &cp, &ri);
assert_eq!(select_pivot(&mut ws), None);
}
#[test]
fn select_pivot_lifo_on_tridiag() {
let cp = [0, 2, 5, 8, 11, 13];
let ri = [0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4];
let mut ws = ws_for(5, &cp, &ri);
assert_eq!(select_pivot(&mut ws), Some(4));
assert_eq!(ws.mindeg, 1);
assert_eq!(ws.head[1], 0);
assert_eq!(ws.last[0], NONE, "new head has no predecessor");
assert_eq!(select_pivot(&mut ws), Some(0));
assert_eq!(ws.head[1], NONE, "deg-1 bucket drained");
assert_eq!(select_pivot(&mut ws), Some(3));
assert_eq!(ws.mindeg, 2);
}
#[test]
fn create_element_elenme_zero_on_arrow_5_hub() {
let cp = [0, 5, 7, 9, 11, 13];
let ri = [0, 1, 2, 3, 4, 0, 1, 0, 2, 0, 3, 0, 4];
let mut ws = ws_for(5, &cp, &ri);
let me = select_pivot(&mut ws).unwrap();
assert_eq!(me, 4, "first pivot is the LIFO head of deg-1");
assert_eq!(ws.elen[4], 0);
let (pme1, pme2_excl, nvpiv, degme) = create_element(&mut ws, me).unwrap();
assert_eq!(nvpiv, 1, "singleton supervariable");
assert_eq!(degme, 1, "only neighbor is the hub (nv=1)");
assert_eq!(pme2_excl - pme1, 1);
assert_eq!(ws.iw[pme1], 0);
assert_eq!(ws.pe[4], pme1 as i32);
assert_eq!(ws.len[4], 1);
assert_eq!(ws.elen[4], flip(1 + 1), "flip(nvpiv + degme)");
assert_eq!(ws.nv[4], -1, "pivot marker");
assert_eq!(ws.nv[0], -1, "hub marked");
assert_eq!(ws.head[4], NONE);
assert_eq!(ws.nel, 1);
}
#[test]
fn create_element_elenme_zero_unlinks_from_degree_list() {
let cp = [0, 2, 5, 8, 11, 13];
let ri = [0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4];
let mut ws = ws_for(5, &cp, &ri);
let me = select_pivot(&mut ws).unwrap();
assert_eq!(me, 4);
let (_, _, _, _) = create_element(&mut ws, me).unwrap();
assert_eq!(ws.head[2], 2);
assert_eq!(ws.last[2], NONE);
assert_eq!(ws.nv[3], -1);
}
#[test]
fn create_element_skips_absorbed_neighbors() {
let cp = [0, 2, 5, 8, 11, 13];
let ri = [0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4];
let mut ws = ws_for(5, &cp, &ri);
ws.nv[0] = 0;
let me = select_pivot(&mut ws).unwrap();
assert_eq!(me, 4);
let (_, _, nvpiv, degme) = create_element(&mut ws, me).unwrap();
assert_eq!(nvpiv, 1);
assert_eq!(degme, 1);
}
#[test]
fn run_elimination_diag_4() {
let cp = [0, 1, 2, 3, 4];
let ri = [0, 1, 2, 3];
let mut ws = ws_for(4, &cp, &ri);
let flops = run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.nel, 4);
assert_eq!(flops.ndiv, 0.0);
}
#[test]
fn run_elimination_arrow_5() {
let cp = [0, 5, 7, 9, 11, 13];
let ri = [0, 1, 2, 3, 4, 0, 1, 0, 2, 0, 3, 0, 4];
let mut ws = ws_for(5, &cp, &ri);
run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.nel, 5);
for i in 0..5 {
assert!(ws.nv[i] >= 0, "nv[{}] = {}", i, ws.nv[i]);
}
}
#[test]
fn run_elimination_tridiag_10() {
let n = 10usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
for j in 0..n {
if j > 0 {
ri.push((j - 1) as i32);
}
ri.push(j as i32);
if j + 1 < n {
ri.push((j + 1) as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.nel, n);
}
#[test]
fn run_elimination_grid_7x7() {
let m = 7usize;
let n = 7usize;
let total = m * n;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
use std::collections::BTreeSet;
let idx = |r: usize, c: usize| r * n + c;
for c in 0..total {
let r0 = c / n;
let c0 = c % n;
let mut neigh: BTreeSet<usize> = BTreeSet::new();
neigh.insert(c);
if r0 > 0 {
neigh.insert(idx(r0 - 1, c0));
}
if r0 + 1 < m {
neigh.insert(idx(r0 + 1, c0));
}
if c0 > 0 {
neigh.insert(idx(r0, c0 - 1));
}
if c0 + 1 < n {
neigh.insert(idx(r0, c0 + 1));
}
for &r in &neigh {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(total, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.nel, total);
}
#[test]
fn run_elimination_band_20_3_triggers_gc() {
let n = 20usize;
let b = 3usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
for j in 0..n {
let lo = j.saturating_sub(b);
let hi = (j + b + 1).min(n);
for r in lo..hi {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.nel, n);
assert!(
ws.ncmpa >= 1,
"expected at least one compaction on band(20,3), got ncmpa={}",
ws.ncmpa
);
}
#[test]
fn run_elimination_arrow_200() {
let n = 200usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
ri.push(0);
for r in 1..n {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
for j in 1..n {
ri.push(0);
ri.push(j as i32);
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
assert_eq!(ws.ndense, 1);
assert_eq!(ws.nel, n);
}
#[test]
fn arrow_200_first_pivot_smoke() {
let n = 200usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
ri.push(0);
for r in 1..n {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
for j in 1..n {
ri.push(0);
ri.push(j as i32);
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
let me = select_pivot(&mut ws).unwrap();
let (_, _, nvpiv, degme) = create_element(&mut ws, me).unwrap();
assert_eq!(nvpiv, 1);
assert_eq!(degme, 0, "spoke's only neighbor (hub) is deferred");
assert_eq!(ws.nel, 2, "1 deferred hub + 1 pivot");
}
fn is_permutation(perm: &[i32]) -> bool {
let n = perm.len();
let mut seen = vec![false; n];
for &p in perm {
if p < 0 {
return false;
}
let pu = p as usize;
if pu >= n || seen[pu] {
return false;
}
seen[pu] = true;
}
true
}
#[test]
fn permutation_diag_4() {
let cp = [0, 1, 2, 3, 4];
let ri = [0, 1, 2, 3];
let mut ws = ws_for(4, &cp, &ri);
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), 4);
assert!(is_permutation(&perm));
assert_eq!(perm, vec![0, 1, 2, 3]);
}
#[test]
fn permutation_arrow_5_valid() {
let cp = [0, 5, 7, 9, 11, 13];
let ri = [0, 1, 2, 3, 4, 0, 1, 0, 2, 0, 3, 0, 4];
let mut ws = ws_for(5, &cp, &ri);
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), 5);
assert!(is_permutation(&perm));
}
#[test]
fn permutation_tridiag_10() {
let n = 10usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
for j in 0..n {
if j > 0 {
ri.push((j - 1) as i32);
}
ri.push(j as i32);
if j + 1 < n {
ri.push((j + 1) as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), n);
assert!(is_permutation(&perm));
}
#[test]
fn permutation_arrow_200_hub_deferred() {
let n = 200usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
ri.push(0);
for r in 1..n {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
for j in 1..n {
ri.push(0);
ri.push(j as i32);
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), n);
assert!(is_permutation(&perm));
assert_eq!(
perm[n - 1],
0,
"dense-deferred hub lands at the tail of the permutation"
);
}
#[test]
fn permutation_grid_7x7() {
let m = 7usize;
let n = 7usize;
let total = m * n;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
use std::collections::BTreeSet;
let idx = |r: usize, c: usize| r * n + c;
for c in 0..total {
let r0 = c / n;
let c0 = c % n;
let mut neigh: BTreeSet<usize> = BTreeSet::new();
neigh.insert(c);
if r0 > 0 {
neigh.insert(idx(r0 - 1, c0));
}
if r0 + 1 < m {
neigh.insert(idx(r0 + 1, c0));
}
if c0 > 0 {
neigh.insert(idx(r0, c0 - 1));
}
if c0 + 1 < n {
neigh.insert(idx(r0, c0 + 1));
}
for &r in &neigh {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(total, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), total);
assert!(is_permutation(&perm));
}
#[test]
fn permutation_band_20_3() {
let n = 20usize;
let b = 3usize;
let mut cp: Vec<i32> = vec![0];
let mut ri: Vec<i32> = Vec::new();
for j in 0..n {
let lo = j.saturating_sub(b);
let hi = (j + b + 1).min(n);
for r in lo..hi {
ri.push(r as i32);
}
cp.push(ri.len() as i32);
}
let p = CscPattern::new(n, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert_eq!(perm.len(), n);
assert!(is_permutation(&perm));
}
#[test]
fn permutation_empty() {
let cp = [0i32];
let ri: [i32; 0] = [];
let p = CscPattern::new(0, &cp, &ri).unwrap();
let mut ws = Workspace::new(&p, &WorkspaceOptions::default()).unwrap();
run_elimination(&mut ws, true).unwrap();
let perm = finalize_permutation(&mut ws);
assert!(perm.is_empty());
}
}