use std::sync::Arc;
use cudarc::driver::{CudaFunction, CudaSlice, CudaStream, LaunchConfig, PushKernelArg};
use lin_alg::{
f32::{Vec3, vec3s_from_dev},
f64::Vec3 as Vec3F64,
};
use crate::{
AtomDynamics, ForcesOnWaterMol, MdOverrides,
non_bonded::{BodyRef, LjTables, NonBondedPair},
solvent::{WaterMol, WaterSite},
};
pub(crate) struct ForcesPositsGpu {
pub forces_on_dyn: CudaSlice<f32>,
pub forces_on_water_o: CudaSlice<f32>,
pub forces_on_water_m: CudaSlice<f32>,
pub forces_on_water_h0: CudaSlice<f32>,
pub forces_on_water_h1: CudaSlice<f32>,
pub virial_gpu: CudaSlice<f64>,
pub energy_gpu: CudaSlice<f64>,
pub cutoff_ewald: f32,
pub alpha_ewald: f32,
pub pos_dyn: CudaSlice<f32>,
pub pos_w_o: CudaSlice<f32>,
pub pos_w_m: CudaSlice<f32>,
pub pos_w_h0: CudaSlice<f32>,
pub pos_w_h1: CudaSlice<f32>,
}
impl ForcesPositsGpu {
pub(crate) fn new(
stream: &Arc<CudaStream>,
n_dyn: usize,
n_water: usize,
cutoff_ewald: f32,
alpha_ewald: f32,
) -> Self {
let forces_on_dyn = stream.alloc_zeros::<f32>(n_dyn * 3).unwrap();
let forces_on_water_o = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let forces_on_water_m = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let forces_on_water_h0 = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let forces_on_water_h1 = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let virial_gpu = stream.clone_htod(&[0.0f64]).unwrap();
let energy_gpu = stream.clone_htod(&[0.0f64]).unwrap();
let pos_dyn = stream.alloc_zeros::<f32>(n_dyn * 3).unwrap();
let pos_w_o = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let pos_w_m = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let pos_w_h0 = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
let pos_w_h1 = stream.alloc_zeros::<f32>(n_water * 3).unwrap();
Self {
forces_on_dyn,
forces_on_water_o,
forces_on_water_m,
forces_on_water_h0,
forces_on_water_h1,
virial_gpu,
energy_gpu,
cutoff_ewald,
alpha_ewald,
pos_dyn,
pos_w_o,
pos_w_m,
pos_w_h0,
pos_w_h1,
}
}
}
pub(crate) struct PerNeighborGpu {
pub tgt_is: CudaSlice<u32>,
pub src_is: CudaSlice<u32>,
pub sigmas: CudaSlice<f32>,
pub epss: CudaSlice<f32>,
pub qs_tgt: CudaSlice<f32>,
pub qs_src: CudaSlice<f32>,
pub atom_types_tgt: CudaSlice<u8>,
pub water_types_tgt: CudaSlice<u8>,
pub atom_types_src: CudaSlice<u8>,
pub water_types_src: CudaSlice<u8>,
pub scale_14: CudaSlice<u8>,
pub calc_ljs: CudaSlice<u8>,
pub calc_coulombs: CudaSlice<u8>,
pub symmetric: CudaSlice<u8>,
}
impl PerNeighborGpu {
pub(crate) fn new(
stream: &Arc<CudaStream>,
pairs: &[NonBondedPair],
atoms_dyn: &[AtomDynamics],
water: &[WaterMol],
lj_tables: &LjTables,
) -> Self {
let n = pairs.len();
let mut sigmas = Vec::with_capacity(n);
let mut epss = Vec::with_capacity(n);
let mut qs_tgt = Vec::with_capacity(n);
let mut qs_src = Vec::with_capacity(n);
let mut scale_14s = Vec::with_capacity(n);
let mut tgt_is: Vec<u32> = Vec::with_capacity(n);
let mut src_is: Vec<u32> = Vec::with_capacity(n);
let mut calc_ljs = Vec::with_capacity(n);
let mut calc_coulombs = Vec::with_capacity(n);
let mut symmetric = Vec::with_capacity(n);
let mut atom_types_tgt = vec![0; n];
let mut water_types_tgt = vec![0; n];
let mut atom_types_src = vec![0; n];
let mut water_types_src = vec![0; n];
for (i, pair) in pairs.iter().enumerate() {
let q_tgt = match pair.tgt {
BodyRef::NonWater(j) => {
tgt_is.push(j as u32);
&atoms_dyn[j]
}
BodyRef::Water { mol: j, site } => {
tgt_is.push(j as u32);
atom_types_tgt[i] = 1;
water_types_tgt[i] = site as u8;
match site {
WaterSite::O => &water[j].o,
WaterSite::M => &water[j].m,
WaterSite::H0 => &water[j].h0,
WaterSite::H1 => &water[j].h1,
}
}
}
.partial_charge;
let q_src = match pair.src {
BodyRef::NonWater(j) => {
src_is.push(j as u32);
&atoms_dyn[j]
}
BodyRef::Water { mol: j, site } => {
src_is.push(j as u32);
atom_types_src[i] = 1;
water_types_src[i] = site as u8;
match site {
WaterSite::O => &water[j].o,
WaterSite::M => &water[j].m,
WaterSite::H0 => &water[j].h0,
WaterSite::H1 => &water[j].h1,
}
}
}
.partial_charge;
let (σ, ε) = lj_tables.lookup(&pair.lj_indices);
sigmas.push(σ);
epss.push(ε);
qs_tgt.push(q_tgt);
qs_src.push(q_src);
scale_14s.push(pair.scale_14);
calc_ljs.push(pair.calc_lj);
calc_coulombs.push(pair.calc_coulomb);
symmetric.push(pair.symmetric);
}
{
assert_eq!(tgt_is.len(), n);
assert_eq!(src_is.len(), n);
assert_eq!(sigmas.len(), n);
assert_eq!(epss.len(), n);
assert_eq!(qs_tgt.len(), n);
assert_eq!(qs_src.len(), n);
assert_eq!(atom_types_tgt.len(), n);
assert_eq!(water_types_tgt.len(), n);
assert_eq!(atom_types_src.len(), n);
assert_eq!(water_types_src.len(), n);
assert_eq!(scale_14s.len(), n);
assert_eq!(calc_ljs.len(), n);
assert_eq!(calc_coulombs.len(), n);
assert_eq!(symmetric.len(), n);
}
let scale_14: Vec<_> = scale_14s.iter().map(|v| *v as u8).collect();
let calc_ljs: Vec<_> = calc_ljs.iter().map(|v| *v as u8).collect();
let calc_coulombs: Vec<_> = calc_coulombs.iter().map(|v| *v as u8).collect();
let symmetric: Vec<_> = symmetric.iter().map(|v| *v as u8).collect();
let tgt_is = stream.clone_htod(&tgt_is).unwrap();
let src_is = stream.clone_htod(&src_is).unwrap();
let sigmas = stream.clone_htod(&sigmas).unwrap();
let epss = stream.clone_htod(&epss).unwrap();
let qs_tgt = stream.clone_htod(&qs_tgt).unwrap();
let qs_src = stream.clone_htod(&qs_src).unwrap();
let atom_types_tgt = stream.clone_htod(&atom_types_tgt).unwrap();
let water_types_tgt = stream.clone_htod(&water_types_tgt).unwrap();
let atom_types_src = stream.clone_htod(&atom_types_src).unwrap();
let water_types_src = stream.clone_htod(&water_types_src).unwrap();
let scale_14 = stream.clone_htod(&scale_14).unwrap();
let calc_ljs = stream.clone_htod(&calc_ljs).unwrap();
let calc_coulombs = stream.clone_htod(&calc_coulombs).unwrap();
let symmetric = stream.clone_htod(&symmetric).unwrap();
Self {
tgt_is,
src_is,
sigmas,
epss,
qs_tgt,
qs_src,
atom_types_tgt,
water_types_tgt,
atom_types_src,
water_types_src,
scale_14,
calc_ljs,
calc_coulombs,
symmetric,
}
}
}
fn upload_positions(
stream: &Arc<CudaStream>,
forces: &mut ForcesPositsGpu,
atoms_dyn: &[AtomDynamics],
water: &[WaterMol],
) {
let mut h_pos_dyn = Vec::with_capacity(atoms_dyn.len() * 3);
for a in atoms_dyn {
let [x, y, z] = a.posit.to_arr();
h_pos_dyn.extend_from_slice(&[x, y, z]);
}
let mut h_pos_o = Vec::with_capacity(water.len() * 3);
let mut h_pos_m = Vec::with_capacity(water.len() * 3);
let mut h_pos_h0 = Vec::with_capacity(water.len() * 3);
let mut h_pos_h1 = Vec::with_capacity(water.len() * 3);
for w in water {
h_pos_o.extend_from_slice(&w.o.posit.to_arr());
h_pos_m.extend_from_slice(&w.m.posit.to_arr());
h_pos_h0.extend_from_slice(&w.h0.posit.to_arr());
h_pos_h1.extend_from_slice(&w.h1.posit.to_arr());
}
stream.memcpy_htod(&h_pos_dyn, &mut forces.pos_dyn).unwrap();
stream.memcpy_htod(&h_pos_o, &mut forces.pos_w_o).unwrap();
stream.memcpy_htod(&h_pos_m, &mut forces.pos_w_m).unwrap();
stream.memcpy_htod(&h_pos_h0, &mut forces.pos_w_h0).unwrap();
stream.memcpy_htod(&h_pos_h1, &mut forces.pos_w_h1).unwrap();
}
pub fn force_nonbonded_gpu(
stream: &Arc<CudaStream>,
kernel: &CudaFunction,
kernel_zero_f32: &CudaFunction,
kernel_zero_f64: &CudaFunction,
pairs: &[NonBondedPair],
atoms_dyn: &[AtomDynamics],
water: &[WaterMol],
cell_extent: Vec3,
forces: &mut ForcesPositsGpu,
per_neighbor: &PerNeighborGpu,
overrides: &MdOverrides,
) -> (Vec<Vec3F64>, Vec<ForcesOnWaterMol>, f64, f64, Vec<f64>) {
upload_positions(stream, forces, atoms_dyn, water);
let n = pairs.len();
zero_forces_and_accums(
stream,
kernel_zero_f32,
kernel_zero_f64,
forces,
atoms_dyn.len(),
water.len(),
);
let n_u32 = n as u32;
let coulomb_disabled = overrides.coulomb_disabled as u8;
let lj_disabled = overrides.lj_disabled as u8;
let cfg = LaunchConfig::for_num_elems(n_u32);
let mut launch_args = stream.launch_builder(kernel);
launch_args.arg(&mut forces.forces_on_dyn);
launch_args.arg(&mut forces.forces_on_water_o);
launch_args.arg(&mut forces.forces_on_water_m);
launch_args.arg(&mut forces.forces_on_water_h0);
launch_args.arg(&mut forces.forces_on_water_h1);
launch_args.arg(&mut forces.virial_gpu);
launch_args.arg(&mut forces.energy_gpu);
launch_args.arg(&forces.pos_dyn);
launch_args.arg(&forces.pos_w_o);
launch_args.arg(&forces.pos_w_m);
launch_args.arg(&forces.pos_w_h0);
launch_args.arg(&forces.pos_w_h1);
launch_args.arg(&per_neighbor.tgt_is);
launch_args.arg(&per_neighbor.src_is);
launch_args.arg(&per_neighbor.sigmas);
launch_args.arg(&per_neighbor.epss);
launch_args.arg(&per_neighbor.qs_tgt);
launch_args.arg(&per_neighbor.qs_src);
launch_args.arg(&per_neighbor.atom_types_tgt);
launch_args.arg(&per_neighbor.water_types_tgt);
launch_args.arg(&per_neighbor.atom_types_src);
launch_args.arg(&per_neighbor.water_types_src);
launch_args.arg(&per_neighbor.scale_14);
launch_args.arg(&per_neighbor.calc_ljs);
launch_args.arg(&per_neighbor.calc_coulombs);
launch_args.arg(&per_neighbor.symmetric);
launch_args.arg(&cell_extent);
launch_args.arg(&forces.cutoff_ewald);
launch_args.arg(&forces.alpha_ewald);
launch_args.arg(&n_u32);
launch_args.arg(&coulomb_disabled);
launch_args.arg(&lj_disabled);
unsafe {
if launch_args.launch(cfg).is_err() {
eprintln!(
"Error launching the non bonded GPU force kernel. (This can happen if there is one or\
more NaNs in the system"
);
}
}
let forces_on_dyn = vec3s_from_dev(stream, &forces.forces_on_dyn);
let forces_on_water_o = vec3s_from_dev(stream, &forces.forces_on_water_o);
let forces_on_water_m = vec3s_from_dev(stream, &forces.forces_on_water_m);
let forces_on_water_h0 = vec3s_from_dev(stream, &forces.forces_on_water_h0);
let forces_on_water_h1 = vec3s_from_dev(stream, &forces.forces_on_water_h1);
let mut forces_on_water = Vec::new();
for i in 0..water.len() {
let f_o = forces_on_water_o[i].into();
let f_m = forces_on_water_m[i].into();
let f_h0 = forces_on_water_h0[i].into();
let f_h1 = forces_on_water_h1[i].into();
forces_on_water.push(ForcesOnWaterMol {
f_o,
f_m,
f_h0,
f_h1,
});
}
let virial = stream.clone_dtoh(&forces.virial_gpu).unwrap()[0];
let energy = stream.clone_dtoh(&forces.energy_gpu).unwrap()[0];
let forces_on_dyn = forces_on_dyn.into_iter().map(|f| f.into()).collect();
(forces_on_dyn, forces_on_water, virial, energy, Vec::new())
}
fn zero_forces_and_accums(
stream: &Arc<CudaStream>,
zero_f32: &CudaFunction,
zero_f64: &CudaFunction,
forces: &mut ForcesPositsGpu,
n_non_water: usize,
n_water: usize,
) {
let std_len_u32 = (n_non_water * 3) as u32;
if std_len_u32 > 0 {
let cfg_dyn = LaunchConfig::for_num_elems(std_len_u32);
let mut l0 = stream.launch_builder(&zero_f32);
l0.arg(&mut forces.forces_on_dyn);
l0.arg(&std_len_u32);
unsafe { l0.launch(cfg_dyn) }.unwrap();
}
let wat_len_u32 = (n_water * 3) as u32;
if wat_len_u32 > 0 {
let cfg_w = LaunchConfig::for_num_elems(wat_len_u32);
let mut l1 = stream.launch_builder(&zero_f32);
l1.arg(&mut forces.forces_on_water_o);
l1.arg(&wat_len_u32);
unsafe { l1.launch(cfg_w) }.unwrap();
let mut l2 = stream.launch_builder(&zero_f32);
l2.arg(&mut forces.forces_on_water_m);
l2.arg(&wat_len_u32);
unsafe { l2.launch(cfg_w) }.unwrap();
let mut l3 = stream.launch_builder(&zero_f32);
l3.arg(&mut forces.forces_on_water_h0);
l3.arg(&wat_len_u32);
unsafe { l3.launch(cfg_w) }.unwrap();
let mut l4 = stream.launch_builder(&zero_f32);
l4.arg(&mut forces.forces_on_water_h1);
l4.arg(&wat_len_u32);
unsafe { l4.launch(cfg_w) }.unwrap();
}
let one: u32 = 1;
let cfg1 = LaunchConfig::for_num_elems(1);
let mut l5 = stream.launch_builder(&zero_f64);
l5.arg(&mut forces.virial_gpu);
l5.arg(&one);
unsafe { l5.launch(cfg1) }.unwrap();
let mut l6 = stream.launch_builder(&zero_f64);
l6.arg(&mut forces.energy_gpu);
l6.arg(&one);
unsafe { l6.launch(cfg1) }.unwrap();
}