use integral_core::os_eri::{self, ShellRef};
use integral_core::{os, rys};
use crate::shell::{Basis, Shell};
use crate::spherical::{shell_transform, transform_block, transform_block4_into};
pub(crate) fn to_func_1e(block: Vec<f64>, sa: &Shell, sb: &Shell) -> Vec<f64> {
let mats = [shell_transform(sa), shell_transform(sb)];
transform_block(
block,
&[sa.n_cart(), sb.n_cart()],
&[mats[0].as_deref(), mats[1].as_deref()],
)
}
pub(crate) fn to_func_eri(
block: Vec<f64>,
sa: &Shell,
sb: &Shell,
sc: &Shell,
sd: &Shell,
) -> Vec<f64> {
let mats = [
shell_transform(sa),
shell_transform(sb),
shell_transform(sc),
shell_transform(sd),
];
to_func_eri_cached(
block,
[sa, sb, sc, sd],
[
mats[0].as_deref(),
mats[1].as_deref(),
mats[2].as_deref(),
mats[3].as_deref(),
],
)
}
pub(crate) fn to_func_eri_cached(
block: Vec<f64>,
s: [&Shell; 4],
mats: [Option<&[f64]>; 4],
) -> Vec<f64> {
transform_block(
block,
&[s[0].n_cart(), s[1].n_cart(), s[2].n_cart(), s[3].n_cart()],
&mats,
)
}
#[derive(Default)]
pub(crate) struct QuartetScratch {
pub(crate) block: Vec<f64>,
tmp: Vec<f64>,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn quartet_into_scratch(
scratch: &mut QuartetScratch,
engine: Engine,
s: [&Shell; 4],
eff: [&[f64]; 4],
mats: [Option<&[f64]>; 4],
) -> usize {
let dims = [s[0].n_cart(), s[1].n_cart(), s[2].n_cart(), s[3].n_cart()];
let n_cart: usize = dims.iter().product();
if scratch.block.len() < n_cart {
scratch.block.resize(n_cart, 0.0);
}
scratch.block[..n_cart].fill(0.0);
contract_quartet_cached_into(
engine,
s[0],
eff[0],
s[1],
eff[1],
s[2],
eff[2],
s[3],
eff[3],
&mut scratch.block[..n_cart],
);
transform_block4_into(&mut scratch.block, dims, &mats, &mut scratch.tmp)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn quartet_into_scratch_pairs(
scratch: &mut QuartetScratch,
engine: Engine,
s: [&Shell; 4],
eff: [&[f64]; 4],
mats: [Option<&[f64]>; 4],
bra_pairs: &os_eri::ShellPairData,
ket_pairs: &os_eri::ShellPairData,
) -> usize {
let dims = [s[0].n_cart(), s[1].n_cart(), s[2].n_cart(), s[3].n_cart()];
let n_cart: usize = dims.iter().product();
if scratch.block.len() < n_cart {
scratch.block.resize(n_cart, 0.0);
}
scratch.block[..n_cart].fill(0.0);
let resolved = match engine {
Engine::Auto => select_engine(
s[0].l() + s[1].l() + s[2].l() + s[3].l(),
s[0].n_prim() * s[1].n_prim() * s[2].n_prim() * s[3].n_prim(),
),
forced => forced,
};
match resolved {
Engine::OsHgp => os_eri::coulomb_shell_pairs_into(
shell_ref(s[0], eff[0]),
shell_ref(s[1], eff[1]),
shell_ref(s[2], eff[2]),
shell_ref(s[3], eff[3]),
bra_pairs,
ket_pairs,
&mut scratch.block[..n_cart],
),
_ => contract_quartet_rys(
s[0],
eff[0],
s[1],
eff[1],
s[2],
eff[2],
s[3],
eff[3],
&mut scratch.block[..n_cart],
),
}
transform_block4_into(&mut scratch.block, dims, &mats, &mut scratch.tmp)
}
#[derive(Default)]
struct EriBatchQueue {
buckets: std::collections::BTreeMap<(usize, usize, usize, usize), Vec<[usize; 4]>>,
blocks: [Vec<f64>; 4],
core: os_eri::EriBatch4Scratch,
}
fn shell_ref<'a>(s: &'a Shell, eff: &'a [f64]) -> ShellRef<'a> {
ShellRef {
center: s.center(),
l: s.l(),
exps: s.exponents(),
coeffs: eff,
}
}
#[allow(clippy::too_many_arguments)]
fn flush_batch4(
queue: &mut EriBatchQueue,
scratch: &mut QuartetScratch,
group: [[usize; 4]; 4],
shells: &[Shell],
eff: &[Vec<f64>],
pair_data: &[os_eri::ShellPairData],
c2s: &[Option<Vec<f64>>],
out: &mut [f64],
nao: usize,
offs: &[usize],
) {
let quartets: [[ShellRef<'_>; 4]; 4] = group.map(|sidx| {
[
shell_ref(&shells[sidx[0]], &eff[sidx[0]]),
shell_ref(&shells[sidx[1]], &eff[sidx[1]]),
shell_ref(&shells[sidx[2]], &eff[sidx[2]]),
shell_ref(&shells[sidx[3]], &eff[sidx[3]]),
]
});
let dims: [[usize; 4]; 4] = group.map(|sidx| [0, 1, 2, 3].map(|q| shells[sidx[q]].n_cart()));
for (lane, d) in dims.iter().enumerate() {
let n: usize = d.iter().product();
if queue.blocks[lane].len() < n {
queue.blocks[lane].resize(n, 0.0);
}
queue.blocks[lane][..n].fill(0.0);
}
{
let [bl0, bl1, bl2, bl3] = &mut queue.blocks;
let mut outs: [&mut [f64]; 4] = [
&mut bl0[..dims[0].iter().product()],
&mut bl1[..dims[1].iter().product()],
&mut bl2[..dims[2].iter().product()],
&mut bl3[..dims[3].iter().product()],
];
os_eri::coulomb_shell_batch4_pairs_into_scratch(
&mut queue.core,
&quartets,
group.map(|sidx| &pair_data[tri_idx(sidx[0], sidx[1])]),
group.map(|sidx| &pair_data[tri_idx(sidx[2], sidx[3])]),
&mut outs,
);
}
for (lane, sidx) in group.iter().enumerate() {
let mats = [
c2s[sidx[0]].as_deref(),
c2s[sidx[1]].as_deref(),
c2s[sidx[2]].as_deref(),
c2s[sidx[3]].as_deref(),
];
let len =
transform_block4_into(&mut queue.blocks[lane], dims[lane], &mats, &mut scratch.tmp);
scatter_eri_block_s8(
out,
nao,
*sidx,
offs,
[0, 1, 2, 3].map(|q| shells[sidx[q]].n_func()),
&queue.blocks[lane][..len],
);
}
}
fn shell_pair_datas(shells: &[Shell], eff: &[Vec<f64>]) -> Vec<os_eri::ShellPairData> {
let nsh = shells.len();
let mut data = Vec::with_capacity(nsh * (nsh + 1) / 2);
for i in 0..nsh {
for j in 0..=i {
data.push(os_eri::shell_pair_data(
shell_ref(&shells[i], &eff[i]),
shell_ref(&shells[j], &eff[j]),
));
}
}
data
}
#[inline]
fn tri_idx(i: usize, j: usize) -> usize {
debug_assert!(i >= j);
i * (i + 1) / 2 + j
}
fn batch_key(
engine: Engine,
s: [&Shell; 4],
pair_counts: [usize; 2],
) -> Option<(usize, usize, usize, usize)> {
let resolved = match engine {
Engine::Auto => select_engine(
s[0].l() + s[1].l() + s[2].l() + s[3].l(),
s[0].n_prim() * s[1].n_prim() * s[2].n_prim() * s[3].n_prim(),
),
forced => forced,
};
if resolved != Engine::OsHgp {
return None;
}
let ne = s[0].l() + s[1].l();
let nf = s[2].l() + s[3].l();
if ne <= 3 && nf <= 3 {
Some((ne, nf, pair_counts[0], pair_counts[1]))
} else {
None
}
}
#[allow(clippy::too_many_arguments)]
fn drain_batch_queue(
queue: &mut EriBatchQueue,
scratch: &mut QuartetScratch,
engine: Engine,
shells: &[Shell],
eff: &[Vec<f64>],
pair_data: &[os_eri::ShellPairData],
c2s: &[Option<Vec<f64>>],
out: &mut [f64],
nao: usize,
offs: &[usize],
) {
let buckets = std::mem::take(&mut queue.buckets);
for sidx in buckets.into_values().flatten() {
let [si, sj, sk, sl] = sidx;
let len = quartet_into_scratch_pairs(
scratch,
engine,
[&shells[si], &shells[sj], &shells[sk], &shells[sl]],
[&eff[si], &eff[sj], &eff[sk], &eff[sl]],
[
c2s[si].as_deref(),
c2s[sj].as_deref(),
c2s[sk].as_deref(),
c2s[sl].as_deref(),
],
&pair_data[tri_idx(si, sj)],
&pair_data[tri_idx(sk, sl)],
);
scatter_eri_block_s8(
out,
nao,
sidx,
offs,
[0, 1, 2, 3].map(|q| shells[sidx[q]].n_func()),
&scratch.block[..len],
);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Engine {
#[default]
Auto,
OsHgp,
Rys,
}
#[must_use]
pub fn select_engine(l_total: usize, contraction_degree: usize) -> Engine {
let threshold = match l_total {
0..=5 => 1, 6..=16 => 16, _ => return Engine::Rys, };
if contraction_degree >= threshold {
Engine::OsHgp
} else {
Engine::Rys
}
}
pub(crate) fn place_block(
mat: &mut [f64],
n: usize,
row_off: usize,
col_off: usize,
block: &[f64],
nb: usize,
) {
let na = block.len() / nb;
for i in 0..na {
for j in 0..nb {
mat[(row_off + i) * n + col_off + j] = block[i * nb + j];
}
}
}
fn contract_pair<F>(sa: &Shell, sb: &Shell, mut prim_op: F) -> Vec<f64>
where
F: FnMut(os::Prim, os::Prim, f64, &mut [f64]),
{
let mut block = vec![0.0; sa.n_cart() * sb.n_cart()];
for pi in 0..sa.n_prim() {
for pj in 0..sb.n_prim() {
let scale = sa.primitive_coeff(pi) * sb.primitive_coeff(pj);
prim_op(sa.prim(pi), sb.prim(pj), scale, &mut block);
}
}
block
}
impl Basis {
#[must_use]
pub fn overlap(&self) -> Vec<f64> {
let n = self.nao();
let offs = self.offsets();
let mut mat = vec![0.0; n * n];
for (si, sa) in self.shells().iter().enumerate() {
for (sj, sb) in self.shells().iter().enumerate() {
let block = to_func_1e(contract_pair(sa, sb, os::overlap_into), sa, sb);
place_block(&mut mat, n, offs[si], offs[sj], &block, sb.n_func());
}
}
mat
}
#[must_use]
pub fn kinetic(&self) -> Vec<f64> {
let n = self.nao();
let offs = self.offsets();
let mut mat = vec![0.0; n * n];
for (si, sa) in self.shells().iter().enumerate() {
for (sj, sb) in self.shells().iter().enumerate() {
let block = to_func_1e(contract_pair(sa, sb, os::kinetic_into), sa, sb);
place_block(&mut mat, n, offs[si], offs[sj], &block, sb.n_func());
}
}
mat
}
#[must_use]
pub fn nuclear(&self, charges: &[([f64; 3], f64)]) -> Vec<f64> {
let n = self.nao();
let offs = self.offsets();
let mut mat = vec![0.0; n * n];
for (si, sa) in self.shells().iter().enumerate() {
for (sj, sb) in self.shells().iter().enumerate() {
let block = to_func_1e(
contract_pair(sa, sb, |a, b, scale, out| {
os::nuclear_into(a, b, charges, scale, out);
}),
sa,
sb,
);
place_block(&mut mat, n, offs[si], offs[sj], &block, sb.n_func());
}
}
mat
}
#[must_use]
pub fn dipole(&self, o: [f64; 3]) -> [Vec<f64>; 3] {
let n = self.nao();
let offs = self.offsets();
let mut dx = vec![0.0; n * n];
let mut dy = vec![0.0; n * n];
let mut dz = vec![0.0; n * n];
for (si, sa) in self.shells().iter().enumerate() {
for (sj, sb) in self.shells().iter().enumerate() {
let (na, nb) = (sa.n_cart(), sb.n_cart());
let (mut bx, mut by, mut bz) =
(vec![0.0; na * nb], vec![0.0; na * nb], vec![0.0; na * nb]);
for pi in 0..sa.n_prim() {
for pj in 0..sb.n_prim() {
let scale = sa.primitive_coeff(pi) * sb.primitive_coeff(pj);
os::dipole_into(
sa.prim(pi),
sb.prim(pj),
o,
scale,
&mut bx,
&mut by,
&mut bz,
);
}
}
let bx = to_func_1e(bx, sa, sb);
let by = to_func_1e(by, sa, sb);
let bz = to_func_1e(bz, sa, sb);
let nbf = sb.n_func();
place_block(&mut dx, n, offs[si], offs[sj], &bx, nbf);
place_block(&mut dy, n, offs[si], offs[sj], &by, nbf);
place_block(&mut dz, n, offs[si], offs[sj], &bz, nbf);
}
}
[dx, dy, dz]
}
#[must_use]
pub fn eri_block(&self, i: usize, j: usize, k: usize, l: usize) -> Vec<f64> {
self.eri_block_with(Engine::Auto, i, j, k, l)
}
#[must_use]
pub fn eri_block_with(
&self,
engine: Engine,
i: usize,
j: usize,
k: usize,
l: usize,
) -> Vec<f64> {
let s = self.shells();
let (sa, sb, sc, sd) = (&s[i], &s[j], &s[k], &s[l]);
let block = contract_quartet(engine, sa, sb, sc, sd);
to_func_eri(block, sa, sb, sc, sd)
}
#[must_use]
pub fn eri(&self) -> Vec<f64> {
self.eri_with(Engine::Auto)
}
#[must_use]
pub fn eri_with(&self, engine: Engine) -> Vec<f64> {
let nao = self.nao();
let offs = self.offsets();
let shells = self.shells();
let eff: Vec<Vec<f64>> = shells.iter().map(effective_coeffs).collect();
let c2s: Vec<Option<Vec<f64>>> = shells.iter().map(shell_transform).collect();
let mut out = vec![0.0; nao * nao * nao * nao];
let mut scratch = QuartetScratch::default();
let mut queue = EriBatchQueue::default();
let pair_data = shell_pair_datas(shells, &eff);
for (si, sa) in shells.iter().enumerate() {
for (sj, sb) in shells.iter().enumerate().take(si + 1) {
let bra_data = &pair_data[tri_idx(si, sj)];
for (sk, sc) in shells.iter().enumerate().take(si + 1) {
let l_top = if sk == si { sj } else { sk };
for (sl, sd) in shells.iter().enumerate().take(l_top + 1) {
let ket_data = &pair_data[tri_idx(sk, sl)];
if let Some(key) =
batch_key(engine, [sa, sb, sc, sd], [bra_data.len(), ket_data.len()])
{
let pending = queue.buckets.entry(key).or_default();
pending.push([si, sj, sk, sl]);
if pending.len() == 4 {
let group: [[usize; 4]; 4] =
[pending[0], pending[1], pending[2], pending[3]];
pending.clear();
flush_batch4(
&mut queue,
&mut scratch,
group,
shells,
&eff,
&pair_data,
&c2s,
&mut out,
nao,
&offs,
);
}
continue;
}
let len = quartet_into_scratch_pairs(
&mut scratch,
engine,
[sa, sb, sc, sd],
[&eff[si], &eff[sj], &eff[sk], &eff[sl]],
[
c2s[si].as_deref(),
c2s[sj].as_deref(),
c2s[sk].as_deref(),
c2s[sl].as_deref(),
],
bra_data,
ket_data,
);
scatter_eri_block_s8(
&mut out,
nao,
[si, sj, sk, sl],
&offs,
[sa.n_func(), sb.n_func(), sc.n_func(), sd.n_func()],
&scratch.block[..len],
);
}
}
}
}
drain_batch_queue(
&mut queue,
&mut scratch,
engine,
shells,
&eff,
&pair_data,
&c2s,
&mut out,
nao,
&offs,
);
out
}
#[must_use]
pub fn schwarz_bounds(&self) -> Vec<f64> {
self.schwarz_bounds_with(Engine::Auto)
}
#[must_use]
pub fn schwarz_bounds_with(&self, engine: Engine) -> Vec<f64> {
let shells = self.shells();
let nsh = shells.len();
let mut q = vec![0.0; nsh * nsh];
for i in 0..nsh {
for j in 0..nsh {
let (ni, nj) = (shells[i].n_func(), shells[j].n_func());
let block = self.eri_block_with(engine, i, j, i, j);
let mut mx = 0.0_f64;
for mu in 0..ni {
for nu in 0..nj {
let idx = ((mu * nj + nu) * ni + mu) * nj + nu;
mx = mx.max(block[idx].abs());
}
}
q[i * nsh + j] = mx.sqrt();
}
}
q
}
#[must_use]
pub fn eri_screened(&self, tau: f64) -> (Vec<f64>, ScreeningStats) {
self.eri_screened_with(Engine::Auto, tau)
}
#[must_use]
pub fn eri_screened_with(&self, engine: Engine, tau: f64) -> (Vec<f64>, ScreeningStats) {
let nao = self.nao();
let offs = self.offsets();
let shells = self.shells();
let nsh = shells.len();
let q = self.schwarz_bounds_with(engine);
let eff: Vec<Vec<f64>> = shells.iter().map(effective_coeffs).collect();
let c2s: Vec<Option<Vec<f64>>> = shells.iter().map(shell_transform).collect();
let mut out = vec![0.0; nao * nao * nao * nao];
let mut total = 0_usize;
let mut skipped = 0_usize;
let mut scratch = QuartetScratch::default();
let mut queue = EriBatchQueue::default();
let pair_data = shell_pair_datas(shells, &eff);
for si in 0..nsh {
for sj in 0..=si {
let qij = q[si * nsh + sj];
let bra_data = &pair_data[tri_idx(si, sj)];
for sk in 0..=si {
let l_top = if sk == si { sj } else { sk };
for sl in 0..=l_top {
total += 1;
if qij * q[sk * nsh + sl] < tau {
skipped += 1;
continue;
}
let ket_data = &pair_data[tri_idx(sk, sl)];
if let Some(key) = batch_key(
engine,
[&shells[si], &shells[sj], &shells[sk], &shells[sl]],
[bra_data.len(), ket_data.len()],
) {
let pending = queue.buckets.entry(key).or_default();
pending.push([si, sj, sk, sl]);
if pending.len() == 4 {
let group: [[usize; 4]; 4] =
[pending[0], pending[1], pending[2], pending[3]];
pending.clear();
flush_batch4(
&mut queue,
&mut scratch,
group,
shells,
&eff,
&pair_data,
&c2s,
&mut out,
nao,
&offs,
);
}
continue;
}
let len = quartet_into_scratch_pairs(
&mut scratch,
engine,
[&shells[si], &shells[sj], &shells[sk], &shells[sl]],
[&eff[si], &eff[sj], &eff[sk], &eff[sl]],
[
c2s[si].as_deref(),
c2s[sj].as_deref(),
c2s[sk].as_deref(),
c2s[sl].as_deref(),
],
bra_data,
ket_data,
);
scatter_eri_block_s8(
&mut out,
nao,
[si, sj, sk, sl],
&offs,
[
shells[si].n_func(),
shells[sj].n_func(),
shells[sk].n_func(),
shells[sl].n_func(),
],
&scratch.block[..len],
);
}
}
}
}
drain_batch_queue(
&mut queue,
&mut scratch,
engine,
shells,
&eff,
&pair_data,
&c2s,
&mut out,
nao,
&offs,
);
(
out,
ScreeningStats {
shell_quartets_total: total,
shell_quartets_skipped: skipped,
tau,
},
)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScreeningStats {
pub shell_quartets_total: usize,
pub shell_quartets_skipped: usize,
pub tau: f64,
}
impl ScreeningStats {
#[must_use]
pub fn skipped_fraction(&self) -> f64 {
if self.shell_quartets_total == 0 {
0.0
} else {
self.shell_quartets_skipped as f64 / self.shell_quartets_total as f64
}
}
}
fn contract_quartet(engine: Engine, sa: &Shell, sb: &Shell, sc: &Shell, sd: &Shell) -> Vec<f64> {
contract_quartet_cached(
engine,
sa,
&effective_coeffs(sa),
sb,
&effective_coeffs(sb),
sc,
&effective_coeffs(sc),
sd,
&effective_coeffs(sd),
)
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn contract_quartet_cached(
engine: Engine,
sa: &Shell,
ea: &[f64],
sb: &Shell,
eb: &[f64],
sc: &Shell,
ec: &[f64],
sd: &Shell,
ed: &[f64],
) -> Vec<f64> {
let mut block = vec![0.0; sa.n_cart() * sb.n_cart() * sc.n_cart() * sd.n_cart()];
contract_quartet_cached_into(engine, sa, ea, sb, eb, sc, ec, sd, ed, &mut block);
block
}
#[allow(clippy::too_many_arguments)]
fn contract_quartet_cached_into(
engine: Engine,
sa: &Shell,
ea: &[f64],
sb: &Shell,
eb: &[f64],
sc: &Shell,
ec: &[f64],
sd: &Shell,
ed: &[f64],
block: &mut [f64],
) {
let resolved = match engine {
Engine::Auto => select_engine(
sa.l() + sb.l() + sc.l() + sd.l(),
sa.n_prim() * sb.n_prim() * sc.n_prim() * sd.n_prim(),
),
forced => forced,
};
match resolved {
Engine::OsHgp => contract_quartet_oshgp(sa, ea, sb, eb, sc, ec, sd, ed, block),
_ => contract_quartet_rys(sa, ea, sb, eb, sc, ec, sd, ed, block),
}
}
pub(crate) fn effective_coeffs(s: &Shell) -> Vec<f64> {
(0..s.n_prim()).map(|i| s.primitive_coeff(i)).collect()
}
#[allow(clippy::too_many_arguments)]
fn contract_quartet_rys(
sa: &Shell,
ea: &[f64],
sb: &Shell,
eb: &[f64],
sc: &Shell,
ec: &[f64],
sd: &Shell,
ed: &[f64],
block: &mut [f64],
) {
for (pa, &ca) in ea.iter().enumerate() {
for (pb, &cb) in eb.iter().enumerate() {
for (pc, &cc) in ec.iter().enumerate() {
for (pd, &cd) in ed.iter().enumerate() {
let scale = ca * cb * cc * cd;
rys::coulomb_into(
sa.prim(pa),
sb.prim(pb),
sc.prim(pc),
sd.prim(pd),
scale,
block,
);
}
}
}
}
}
#[allow(clippy::too_many_arguments)]
fn contract_quartet_oshgp(
sa: &Shell,
ea: &[f64],
sb: &Shell,
eb: &[f64],
sc: &Shell,
ec: &[f64],
sd: &Shell,
ed: &[f64],
block: &mut [f64],
) {
os_eri::coulomb_shell_into(
ShellRef {
center: sa.center(),
l: sa.l(),
exps: sa.exponents(),
coeffs: ea,
},
ShellRef {
center: sb.center(),
l: sb.l(),
exps: sb.exponents(),
coeffs: eb,
},
ShellRef {
center: sc.center(),
l: sc.l(),
exps: sc.exponents(),
coeffs: ec,
},
ShellRef {
center: sd.center(),
l: sd.l(),
exps: sd.exponents(),
coeffs: ed,
},
block,
);
}
pub(crate) const PERMS8: [[usize; 4]; 8] = [
[0, 1, 2, 3], [1, 0, 2, 3], [0, 1, 3, 2], [1, 0, 3, 2], [2, 3, 0, 1], [2, 3, 1, 0], [3, 2, 0, 1], [3, 2, 1, 0], ];
pub(crate) fn canonical_shell_pairs(nsh: usize) -> Vec<(usize, usize)> {
let mut pairs = Vec::with_capacity(nsh * (nsh + 1) / 2);
for i in 0..nsh {
for j in 0..=i {
pairs.push((i, j));
}
}
pairs
}
fn scatter_eri_block_s8(
out: &mut [f64],
nao: usize,
sidx: [usize; 4],
offs: &[usize],
n: [usize; 4],
block: &[f64],
) {
let mut seen: [[usize; 4]; 8] = [[usize::MAX; 4]; 8];
let mut n_seen = 0;
for perm in &PERMS8 {
let tup = [sidx[perm[0]], sidx[perm[1]], sidx[perm[2]], sidx[perm[3]]];
if seen[..n_seen].contains(&tup) {
continue;
}
seen[n_seen] = tup;
n_seen += 1;
let mut stride = [0usize; 4];
let mut base = 0usize;
for (q, &src_axis) in perm.iter().enumerate() {
let s = nao.pow(3 - q as u32);
stride[src_axis] = s;
base += offs[sidx[src_axis]] * s;
}
let mut src = 0usize;
for a in 0..n[0] {
let oa = base + a * stride[0];
for b in 0..n[1] {
let ob = oa + b * stride[1];
for c in 0..n[2] {
let oc = ob + c * stride[2];
for d in 0..n[3] {
out[oc + d * stride[3]] = block[src];
src += 1;
}
}
}
}
}
}