use std::ops::AddAssign;
use ewald::{PmeRecip, force_coulomb_short_range, get_grid_n};
#[allow(unused)]
#[cfg(target_arch = "x86_64")]
use lin_alg::f32::{Vec3x8, Vec3x16, f32x8, f32x16};
use lin_alg::{f32::Vec3, f64::Vec3 as Vec3F64};
use rayon::prelude::*;
#[cfg(feature = "cuda")]
use crate::gpu_interface::force_nonbonded_gpu;
use crate::{
AtomDynamics, ComputationDevice, MdOverrides, MdState,
barostat::SimBox,
forces::force_e_lj,
solvent::{ForcesOnWaterMol, O_EPS, O_SIGMA, WaterMol, WaterSite},
};
#[allow(unused)]
#[cfg(target_arch = "x86_64")]
use crate::{AtomDynamicsx8, AtomDynamicsx16};
const SCALE_LJ_14: f32 = 0.5;
pub const SCALE_COUL_14: f32 = 1.0 / 1.2;
pub const CHARGE_UNIT_SCALER: f32 = 18.2223;
#[derive(Debug)]
pub enum LjTableIndices {
StdStd((usize, usize)),
StdWater(usize),
WaterWater,
}
#[derive(Default)]
pub struct LjTables {
pub std: Vec<(f32, f32)>,
pub water_std: Vec<(f32, f32)>,
pub n_std: usize,
}
#[allow(unused)]
#[cfg(target_arch = "x86_64")]
#[derive(Default)]
pub struct LjTablesx8 {
pub std: Vec<(f32x8, f32x8)>,
pub water_std: Vec<(f32x8, f32x8)>,
pub n_std: [usize; 8],
}
#[allow(unused)]
#[cfg(target_arch = "x86_64")]
#[derive(Default)]
pub struct LjTablesx16 {
pub std: Vec<(f32x16, f32x16)>,
pub water_std: Vec<(f32x16, f32x16)>,
pub n_std: [usize; 8],
}
impl LjTables {
pub fn new(atoms: &[AtomDynamics]) -> Self {
let n_std = atoms.len();
if n_std == 0 {
return Default::default();
}
let mut std = Vec::with_capacity(n_std * (n_std - 1) / 2);
for (i_0, atom_0) in atoms.iter().enumerate() {
for (i_1, atom_1) in atoms.iter().enumerate() {
if i_1 <= i_0 {
continue;
}
let (σ, ε) = combine_lj_params(atom_0, atom_1);
std.push((σ, ε));
}
}
let mut water_std = Vec::with_capacity(n_std);
for atom in atoms {
let σ = 0.5 * (atom.lj_sigma + O_SIGMA);
let ε = (atom.lj_eps * O_EPS).sqrt();
water_std.push((σ, ε));
}
Self {
std,
water_std,
n_std,
}
}
pub fn lookup(&self, i: &LjTableIndices) -> (f32, f32) {
match i {
LjTableIndices::StdStd((i_0, i_1)) => {
let (i, j) = if i_0 < i_1 {
(*i_0, *i_1)
} else {
(*i_1, *i_0)
};
if i >= self.n_std {
println!("I > i: {i} std: {}", self.n_std);
}
if j >= self.n_std {
println!("J > J: {j} std: {}", self.n_std);
}
let idx = i * (2 * self.n_std - i - 1) / 2 + (j - i - 1);
self.std[idx]
}
LjTableIndices::StdWater(ix) => self.water_std[*ix],
LjTableIndices::WaterWater => (O_SIGMA, O_EPS),
}
}
}
impl AddAssign<Self> for ForcesOnWaterMol {
fn add_assign(&mut self, rhs: Self) {
self.f_o += rhs.f_o;
self.f_h0 += rhs.f_h0;
self.f_h1 += rhs.f_h1;
self.f_m += rhs.f_m;
}
}
#[derive(Copy, Clone)]
pub enum BodyRef {
NonWater(usize),
Water { mol: usize, site: WaterSite },
}
impl BodyRef {
pub(crate) fn get<'a>(
&self,
non_waters: &'a [AtomDynamics],
waters: &'a [WaterMol],
) -> &'a AtomDynamics {
match *self {
BodyRef::NonWater(i) => &non_waters[i],
BodyRef::Water { mol, site } => match site {
WaterSite::O => &waters[mol].o,
WaterSite::M => &waters[mol].m,
WaterSite::H0 => &waters[mol].h0,
WaterSite::H1 => &waters[mol].h1,
},
}
}
}
pub struct NonBondedPair {
pub tgt: BodyRef,
pub src: BodyRef,
pub scale_14: bool,
pub lj_indices: LjTableIndices,
pub calc_lj: bool,
pub calc_coulomb: bool,
pub symmetric: bool,
}
fn add_to_sink(
sink_non_water: &mut [Vec3F64],
sink_wat: &mut [ForcesOnWaterMol],
body_type: BodyRef,
f: Vec3F64,
) {
match body_type {
BodyRef::NonWater(i) => sink_non_water[i] += f,
BodyRef::Water { mol, site } => match site {
WaterSite::O => sink_wat[mol].f_o += f,
WaterSite::M => sink_wat[mol].f_m += f,
WaterSite::H0 => sink_wat[mol].f_h0 += f,
WaterSite::H1 => sink_wat[mol].f_h1 += f,
},
}
}
fn calc_force_cpu(
pairs: &[NonBondedPair],
atoms_std: &[AtomDynamics],
water: &[WaterMol],
cell: &SimBox,
lj_tables: &LjTables,
overrides: &MdOverrides,
mol_start_indices: &[usize],
spme_alpha: f32,
coulomb_cutoff: f32,
lj_cutoff: f32,
) -> (Vec<Vec3F64>, Vec<ForcesOnWaterMol>, f64, f64, Vec<f64>) {
let n_std = atoms_std.len();
let n_wat = water.len();
let n_mol = mol_start_indices.len();
let find_mol_idx = |atom_idx: usize, starts: &[usize]| -> usize {
starts
.binary_search(&atom_idx)
.unwrap_or_else(|pos| if pos == 0 { 0 } else { pos - 1 })
};
pairs
.par_iter()
.fold(
|| {
(
vec![Vec3F64::new_zero(); n_std],
vec![ForcesOnWaterMol::default(); n_wat],
0.0_f64, 0.0_f64, vec![0.0_f64; mol_start_indices.len().pow(2)], )
},
|(mut f_std, mut f_wat, mut virial, mut energy, mut energy_between_mols), p| {
let a_t = p.tgt.get(atoms_std, water);
let a_s = p.src.get(atoms_std, water);
let (f, e_pair) = f_nonbonded_cpu(
&mut virial,
a_t,
a_s,
cell,
p.scale_14,
&p.lj_indices,
lj_tables,
p.calc_lj,
p.calc_coulomb,
overrides,
spme_alpha,
coulomb_cutoff,
lj_cutoff,
);
let f: Vec3F64 = f.into();
add_to_sink(&mut f_std, &mut f_wat, p.tgt, f);
if p.symmetric {
add_to_sink(&mut f_std, &mut f_wat, p.src, -f);
}
let involves_std =
matches!(p.tgt, BodyRef::NonWater(_)) || matches!(p.src, BodyRef::NonWater(_));
if involves_std {
energy += e_pair as f64;
}
if let (BodyRef::NonWater(i_tgt), BodyRef::NonWater(i_src)) = (p.tgt, p.src) {
let m_t = find_mol_idx(i_tgt, mol_start_indices);
let m_s = find_mol_idx(i_src, mol_start_indices);
let idx_ts = m_t * n_mol + m_s;
energy_between_mols[idx_ts] += e_pair as f64;
if m_t != m_s {
let idx_st = m_s * n_mol + m_t;
energy_between_mols[idx_st] += e_pair as f64;
}
}
(f_std, f_wat, virial, energy, energy_between_mols)
},
)
.reduce(
|| {
(
vec![Vec3F64::new_zero(); n_std],
vec![ForcesOnWaterMol::default(); n_wat],
0.0_f64,
0.0_f64,
vec![0.0_f64; mol_start_indices.len().pow(2)],
)
},
|(mut f_on_std, mut f_on_water, virial_a, e_a, mut em_a),
(db, wb, virial_b, e_b, em_b)| {
for i in 0..n_std {
f_on_std[i] += db[i];
}
for i in 0..n_wat {
f_on_water[i].f_o += wb[i].f_o;
f_on_water[i].f_m += wb[i].f_m;
f_on_water[i].f_h0 += wb[i].f_h0;
f_on_water[i].f_h1 += wb[i].f_h1;
}
for i in 0..em_a.len() {
em_a[i] += em_b[i];
}
(f_on_std, f_on_water, virial_a + virial_b, e_a + e_b, em_a)
},
)
}
impl MdState {
pub fn apply_nonbonded_forces(&mut self, dev: &ComputationDevice) {
let (f_on_non_water, f_on_water, virial, energy, energy_between_mols) = match dev {
ComputationDevice::Cpu => {
if is_x86_feature_detected!("avx512f") {
} else {
}
calc_force_cpu(
&self.nb_pairs,
&self.atoms,
&self.water,
&self.cell,
&self.lj_tables,
&self.cfg.overrides,
&self.mol_start_indices,
self.cfg.spme_alpha,
self.cfg.coulomb_cutoff,
self.cfg.lj_cutoff,
)
}
#[cfg(feature = "cuda")]
ComputationDevice::Gpu(stream) => force_nonbonded_gpu(
stream,
self.gpu_kernel.as_ref().unwrap(),
self.gpu_kernel_zero_f32.as_ref().unwrap(),
self.gpu_kernel_zero_f64.as_ref().unwrap(),
&self.nb_pairs,
&self.atoms,
&self.water,
self.cell.extent,
self.forces_posits_gpu.as_mut().unwrap(),
self.per_neighbor_gpu.as_ref().unwrap(),
&self.cfg.overrides,
),
};
for (i, tgt) in self.atoms.iter_mut().enumerate() {
let f: Vec3 = f_on_non_water[i].into();
tgt.force += f;
}
for (i, tgt) in self.water.iter_mut().enumerate() {
let f = f_on_water[i];
let f_0: Vec3 = f.f_o.into();
let f_m: Vec3 = f.f_m.into();
let f_h0: Vec3 = f.f_h0.into();
let f_h1: Vec3 = f.f_h1.into();
tgt.o.force += f_0;
tgt.m.force += f_m;
tgt.h0.force += f_h0;
tgt.h1.force += f_h1;
}
self.potential_energy += energy;
self.potential_energy_nonbonded += energy;
self.barostat.virial.nonbonded_short_range += virial;
if energy_between_mols.len() == self.potential_energy_between_mols.len() {
for (i, e) in self.potential_energy_between_mols.iter_mut().enumerate() {
*e += energy_between_mols[i];
}
}
}
pub(crate) fn setup_pairs(&mut self) {
let atoms = &self.atoms;
let n_std = self.atoms.len();
let n_water_mols = self.water.len();
let sites = [WaterSite::O, WaterSite::M, WaterSite::H0, WaterSite::H1];
let exclusions = &self.pairs_excluded_12_13;
let scaled_set = &self.pairs_14_scaled;
let pairs_std_std: Vec<_> = (0..n_std)
.flat_map(|i_tgt| {
self.neighbors_nb.std_std[i_tgt]
.iter()
.copied()
.filter(move |&j| j > i_tgt) .filter_map(move |i_src| {
if atoms[i_src].bonded_only || atoms[i_tgt].bonded_only {
return None;
}
let key = (i_tgt, i_src);
if exclusions.contains(&key) {
return None;
}
let scale_14 = scaled_set.contains(&key);
Some(NonBondedPair {
tgt: BodyRef::NonWater(i_tgt),
src: BodyRef::NonWater(i_src),
scale_14,
lj_indices: LjTableIndices::StdStd(key),
calc_lj: true,
calc_coulomb: true,
symmetric: true,
})
})
})
.collect();
let mut pairs_std_water: Vec<_> = (0..n_std)
.flat_map(|i_std| {
self.neighbors_nb.std_water[i_std]
.iter()
.copied()
.flat_map(move |i_water| {
sites.into_iter().map(move |site| NonBondedPair {
tgt: BodyRef::NonWater(i_std),
src: BodyRef::Water { mol: i_water, site },
scale_14: false,
lj_indices: LjTableIndices::StdWater(i_std),
calc_lj: site == WaterSite::O,
calc_coulomb: site != WaterSite::O,
symmetric: true,
})
})
})
.collect();
let mut pairs_water_water = Vec::new();
for i_0 in 0..n_water_mols {
for &i_1 in &self.neighbors_nb.water_water[i_0] {
if i_1 <= i_0 {
continue;
}
for &site_0 in &sites {
for &site_1 in &sites {
let calc_lj = site_0 == WaterSite::O && site_1 == WaterSite::O;
let calc_coulomb = site_0 != WaterSite::O && site_1 != WaterSite::O;
if !(calc_lj || calc_coulomb) {
continue;
}
pairs_water_water.push(NonBondedPair {
tgt: BodyRef::Water {
mol: i_0,
site: site_0,
},
src: BodyRef::Water {
mol: i_1,
site: site_1,
},
scale_14: false,
lj_indices: LjTableIndices::WaterWater,
calc_lj,
calc_coulomb,
symmetric: true,
});
}
}
}
}
let len_added = pairs_std_water.len() + pairs_water_water.len();
let mut pairs = pairs_std_std;
pairs.reserve(len_added);
pairs.append(&mut pairs_std_water);
pairs.append(&mut pairs_water_water);
self.nb_pairs = pairs;
}
pub(crate) fn handle_spme_recip(&mut self, dev: &ComputationDevice) -> (Vec<Vec3>, f64, f64) {
let (pos_all, q_all) = self.pack_pme_pos_q();
let (f_recip, e_recip, virial_from_kspace) = match &mut self.pme_recip {
Some(pme_recip) => match dev {
ComputationDevice::Cpu => pme_recip.forces_and_virial(&pos_all, &q_all),
#[cfg(feature = "cuda")]
#[allow(unused)]
ComputationDevice::Gpu(stream) => {
#[cfg(not(any(feature = "cufft", feature = "vkfft")))]
let (f, e) = pme_recip.forces(&pos_all, &q_all);
#[cfg(any(feature = "cufft", feature = "vkfft"))]
let (f, e) = pme_recip.forces_gpu(stream, &pos_all, &q_all);
(f, e, 0.0_f64) }
},
None => {
panic!("No PME recip available; not computing SPME recip.");
}
};
self.potential_energy += e_recip as f64;
self.potential_energy_nonbonded += e_recip as f64;
self.unpack_apply_pme_forces(&f_recip);
let mut virial_lr_recip = virial_from_kspace;
for &(i, j) in &self.pairs_14_scaled {
let diff = self
.cell
.min_image(self.atoms[i].posit - self.atoms[j].posit);
let r = diff.magnitude();
if r.abs() < 1e-6 {
continue;
}
let dir = diff / r;
let qi = self.atoms[i].partial_charge;
let qj = self.atoms[j].partial_charge;
let inv_r = 1.0 / r;
let inv_r2 = inv_r * inv_r;
let f_vac = dir * (qi * qj * inv_r2);
let df = f_vac * (SCALE_COUL_14 - 1.0);
self.atoms[i].force += df;
self.atoms[j].force -= df;
virial_lr_recip += (dir * r).dot(df) as f64; }
self.barostat.virial.nonbonded_long_range += virial_lr_recip;
(f_recip, e_recip as f64, virial_lr_recip)
}
fn pack_pme_pos_q(&self) -> (Vec<Vec3>, Vec<f32>) {
let n_std = self.atoms.len();
let n_wat = self.water.len();
let mut pos = Vec::with_capacity(n_std + 3 * n_wat);
let mut q = Vec::with_capacity(pos.capacity());
for a in &self.atoms {
pos.push(self.cell.wrap(a.posit)); q.push(a.partial_charge); }
for w in &self.water {
pos.push(self.cell.wrap(w.m.posit));
q.push(w.m.partial_charge);
pos.push(self.cell.wrap(w.h0.posit));
q.push(w.h0.partial_charge);
pos.push(self.cell.wrap(w.h1.posit));
q.push(w.h1.partial_charge);
}
(pos, q)
}
pub(crate) fn unpack_apply_pme_forces(&mut self, forces: &[Vec3]) {
let water_start = self.atoms.len();
for (i, f) in forces.iter().enumerate() {
if i < water_start {
self.atoms[i].force += *f;
} else {
let i_wat = i - water_start;
let i_wat_mol = i_wat / 3;
match i_wat % 3 {
0 => self.water[i_wat_mol].m.force += *f,
1 => self.water[i_wat_mol].h0.force += *f,
_ => self.water[i_wat_mol].h1.force += *f,
}
}
}
}
pub(crate) fn regen_pme(&mut self, dev: &ComputationDevice) {
let [lx, ly, lz] = self.cell.extent.to_arr();
let l = (lx, ly, lz);
let n = get_grid_n(l, self.cfg.spme_mesh_spacing);
self.pme_recip = Some(match dev {
ComputationDevice::Cpu => {
#[cfg(any(feature = "vkfft", feature = "cufft"))]
let v = PmeRecip::new(None, n, l, self.cfg.spme_alpha);
#[cfg(not(any(feature = "vkfft", feature = "cufft")))]
let v = PmeRecip::new(n, l, self.cfg.spme_alpha);
v
}
#[cfg(feature = "cuda")]
ComputationDevice::Gpu(stream) => {
#[cfg(any(feature = "vkfft", feature = "cufft"))]
let v = PmeRecip::new(Some(stream), n, l, self.cfg.spme_alpha);
#[cfg(not(any(feature = "vkfft", feature = "cufft")))]
let v = PmeRecip::new(n, l, self.cfg.spme_alpha);
v
}
});
}
}
#[allow(clippy::too_many_arguments)]
pub fn f_nonbonded_cpu(
virial_w: &mut f64,
tgt: &AtomDynamics,
src: &AtomDynamics,
cell: &SimBox,
scale14: bool, lj_indices: &LjTableIndices,
lj_tables: &LjTables,
calc_lj: bool,
calc_coulomb: bool,
overrides: &MdOverrides,
spme_alpha: f32,
coulomb_cutoff: f32,
lj_cutoff: f32,
) -> (Vec3, f32) {
let diff = cell.min_image(tgt.posit - src.posit);
let dist_sq = diff.magnitude_squared();
if dist_sq < 1e-12 {
return (Vec3::new_zero(), 0.);
}
let dist = dist_sq.sqrt();
let inv_dist = 1.0 / dist;
let dir = diff * inv_dist;
let (f_lj, energy_lj) = if !calc_lj || dist > lj_cutoff || overrides.lj_disabled {
(Vec3::new_zero(), 0.)
} else {
let (σ, ε) = lj_tables.lookup(lj_indices);
let (mut f, mut e) = force_e_lj(dir, inv_dist, σ, ε);
if scale14 {
f *= SCALE_LJ_14;
e *= SCALE_LJ_14;
}
(f, e)
};
let (mut f_coulomb, mut energy_coulomb) = if !calc_coulomb || overrides.coulomb_disabled {
(Vec3::new_zero(), 0.)
} else {
force_coulomb_short_range(
dir,
dist,
inv_dist,
tgt.partial_charge,
src.partial_charge,
coulomb_cutoff,
spme_alpha,
)
};
if scale14 {
f_coulomb *= SCALE_COUL_14;
energy_coulomb *= SCALE_COUL_14;
}
let force = f_lj + f_coulomb;
let energy = energy_lj + energy_coulomb;
*virial_w += diff.dot(force) as f64;
(force, energy)
}
fn combine_lj_params(atom_0: &AtomDynamics, atom_1: &AtomDynamics) -> (f32, f32) {
let σ = 0.5 * (atom_0.lj_sigma + atom_1.lj_sigma);
let ε = (atom_0.lj_eps * atom_1.lj_eps).sqrt();
(σ, ε)
}