use anyhow::{Context, anyhow};
use itertools::Itertools;
use log::{info, trace};
use nalgebra::Vector3;
use num_integer::Integer;
use num_traits::{FromPrimitive, NumCast};
use parking_lot::Mutex;
use rayon::prelude::*;
use std::cell::RefCell;
use std::sync::atomic::{AtomicUsize, Ordering};
use thread_local::ThreadLocal;
use crate::density_map::sequential_compute_particle_densities_filtered;
use crate::kernel::{CubicSplineKernel, SymmetricKernel3d};
use crate::marching_cubes::marching_cubes_lut::marching_cubes_triangulation_iter;
use crate::mesh::{HexMesh3d, TriMesh3d};
use crate::neighborhood_search::{
FlatNeighborhoodList, neighborhood_search_spatial_hashing_flat_filtered,
neighborhood_search_spatial_hashing_parallel,
};
use crate::topology::Direction;
use crate::uniform_grid::{EdgeIndex, GridConstructionError, UniformCartesianCubeGrid3d};
use crate::{
Aabb3d, MapType, Parameters, RealConvert, SpatialDecomposition, SurfaceReconstruction, kernel,
new_map, new_parallel_map, profile,
};
use crate::{Index, Real};
type GlobalIndex = i64;
pub(crate) struct ParametersSubdomainGrid<I: Index, R: Real> {
#[allow(unused)]
pub particle_radius: R,
pub particle_rest_mass: R,
pub compact_support_radius: R,
pub surface_threshold: R,
pub cube_size: R,
pub subdomain_cubes: I,
pub ghost_particle_margin: R,
pub global_marching_cubes_grid: UniformCartesianCubeGrid3d<GlobalIndex, R>,
pub subdomain_grid: UniformCartesianCubeGrid3d<I, R>,
pub chunk_size: usize,
pub global_neighborhood_list: bool,
}
impl<I: Index, R: Real> ParametersSubdomainGrid<I, R> {
pub(crate) fn global_marching_cubes_grid(
&self,
) -> Result<UniformCartesianCubeGrid3d<I, R>, GridConstructionError<I, R>> {
let n_cells = self.global_marching_cubes_grid.cells_per_dim();
UniformCartesianCubeGrid3d::new(
self.global_marching_cubes_grid.aabb().min(),
&[
I::from(n_cells[0]).ok_or(GridConstructionError::IndexTypeTooSmallCellsPerDim)?,
I::from(n_cells[1]).ok_or(GridConstructionError::IndexTypeTooSmallCellsPerDim)?,
I::from(n_cells[2]).ok_or(GridConstructionError::IndexTypeTooSmallCellsPerDim)?,
],
self.global_marching_cubes_grid.cell_size(),
)
}
}
pub(crate) struct Subdomains<I: Index> {
flat_subdomain_indices: Vec<I>,
per_subdomain_particles: Vec<Vec<usize>>,
}
pub(crate) fn initialize_parameters<I: Index, R: Real>(
parameters: &Parameters<R>,
_particles: &[Vector3<R>],
output_surface: &SurfaceReconstruction<I, R>,
) -> Result<ParametersSubdomainGrid<I, R>, anyhow::Error> {
let chunk_size = 500;
let SpatialDecomposition::UniformGrid(grid_parameters) = ¶meters.spatial_decomposition
else {
return Err(anyhow!(
"spatial decomposition parameters for uniform grid are missing"
));
};
let subdomain_cubes_in = grid_parameters.subdomain_num_cubes_per_dim;
let subdomain_cubes = I::from_u32(subdomain_cubes_in)
.expect("number of subdomain cubes has to fit in index type");
let subdomain_cubes_global = GlobalIndex::from_u32(subdomain_cubes_in)
.expect("number of subdomain cubes has to fit in global index type");
let particle_radius = parameters.particle_radius;
let particle_rest_density = parameters.rest_density;
let compact_support_radius = parameters.compact_support_radius;
let cube_size = parameters.cube_size;
let surface_threshold = parameters.iso_surface_threshold;
let particle_rest_volume = kernel::Volume::cube_particle(particle_radius);
let particle_rest_mass = particle_rest_volume * particle_rest_density;
let ghost_particle_margin =
(compact_support_radius / cube_size).ceil() * cube_size * 1.01.convert();
{
let ghost_margin_cubes = I::from((ghost_particle_margin / cube_size).ceil())
.expect("ghost margin cube count has to fit in index type");
let vol_subdomain = subdomain_cubes
.checked_cubed()
.expect("number of cubes per subdomain has to be representable in index type");
let vol_margin = (ghost_margin_cubes * I::two() + subdomain_cubes)
.checked_cubed()
.expect(
"number of cubes per subdomain with margin has to be representable in index type",
)
- vol_subdomain;
info!(
"The ghost margin volume per subdomain is {:.2}% of the subdomain volume",
(vol_margin.to_real().unwrap_or(R::one())
/ vol_subdomain.to_real().unwrap_or(R::one()))
* 100.0.convert()
);
info!(
"The ghost margin per subdomain is {:.2} MC cells or {:.2} subdomains wide",
ghost_particle_margin / cube_size,
ghost_particle_margin / (cube_size * subdomain_cubes.to_real_unchecked())
);
}
let aabb = output_surface.grid.aabb();
let global_mc_grid = UniformCartesianCubeGrid3d::<GlobalIndex, R>::new(
aabb.min(),
&output_surface
.grid
.cells_per_dim()
.map(|c| <GlobalIndex as NumCast>::from(c).unwrap()),
cube_size,
)
.context("construct initial global marching cubes cell grid")?;
trace!("Initial global MC Grid: {:?}", global_mc_grid);
let cells_per_dim = global_mc_grid.cells_per_dim();
let num_subdomains = [
int_ceil_div(cells_per_dim[0], subdomain_cubes_global),
int_ceil_div(cells_per_dim[1], subdomain_cubes_global),
int_ceil_div(cells_per_dim[2], subdomain_cubes_global),
];
let num_global_mc_cells = (|| -> Option<_> {
Some([
num_subdomains[0].checked_mul(subdomain_cubes_global)?,
num_subdomains[1].checked_mul(subdomain_cubes_global)?,
num_subdomains[2].checked_mul(subdomain_cubes_global)?,
])
})()
.context("compute global number of marching cubes cells per dimension")?;
let global_mc_grid = UniformCartesianCubeGrid3d::<GlobalIndex, R>::new(
global_mc_grid.aabb().min(),
&num_global_mc_cells,
cube_size,
)
.context("construct final global marching cubes cell grid")?;
trace!("Global MC Grid: {:?}", global_mc_grid);
let num_subdomains = (|| -> Option<_> {
Some([
I::from(num_subdomains[0])?,
I::from(num_subdomains[1])?,
I::from(num_subdomains[2])?,
])
})()
.context("convert number of subdomains per dimension to local index type")?;
let subdomain_count: I = (|| -> Option<_> {
num_subdomains[0].checked_mul(&num_subdomains[1].checked_mul(&num_subdomains[2])?)
})()
.context("compute total number of subdomains")?;
let subdomain_size = cube_size * subdomain_cubes.to_real_unchecked();
let subdomain_grid = UniformCartesianCubeGrid3d::<I, R>::new(
global_mc_grid.aabb().min(),
&num_subdomains,
subdomain_size,
)?;
{
let nc = subdomain_cubes;
let [nx, ny, nz] = subdomain_grid.cells_per_dim();
let [mc_x, mc_y, mc_z] = global_mc_grid.cells_per_dim();
info!("Number of subdomains: {subdomain_count} ({nx}x{ny}x{nz})");
info!(
"Number of MC cells per subdomain: {} ({nc}x{nc}x{nc})",
nc.cubed()
);
info!(
"Number of MC cells globally: {} ({mc_x}x{mc_y}x{mc_z})",
mc_x * mc_y * mc_z
);
trace!("Subdomain grid: {:?}", subdomain_grid);
}
Ok(ParametersSubdomainGrid {
particle_radius,
particle_rest_mass,
compact_support_radius,
surface_threshold,
cube_size,
subdomain_cubes,
ghost_particle_margin,
global_marching_cubes_grid: global_mc_grid,
subdomain_grid,
chunk_size,
global_neighborhood_list: parameters.global_neighborhood_list,
})
}
#[allow(unused)]
pub(crate) fn extract_narrow_band<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
particles: &[Vector3<R>],
) -> Vec<Vector3<R>> {
profile!("Filter narrow band");
let compact_support_radius = parameters.compact_support_radius;
let ghost_particle_margin = (compact_support_radius / parameters.cube_size).ceil()
* parameters.cube_size
* 1.01.convert();
let aabb = {
let mut aabb = Aabb3d::<R>::par_from_points(particles);
aabb.grow_uniformly(ghost_particle_margin);
info!("Enlarged AABB: {:?}", aabb);
aabb
};
let neighbor_lists = {
profile!("Global neighborhood search");
let mut neighbors = Vec::new();
neighborhood_search_spatial_hashing_parallel::<GlobalIndex, _>(
&aabb,
particles,
compact_support_radius,
&mut neighbors,
);
neighbors
};
let narrow_band = {
profile!("Identify narrow band");
let surface_particle_neighbors = 30;
let surface_particles = neighbor_lists
.iter()
.enumerate()
.filter_map(|(i, nl)| (nl.len() < surface_particle_neighbors).then_some(i))
.collect::<Vec<_>>();
info!("");
info!(
"Number of pure \"surface particles\": {}",
surface_particles.len()
);
let mut first_ring = surface_particles
.iter()
.copied()
.flat_map(|i| neighbor_lists[i].iter().copied())
.collect::<Vec<_>>();
info!("First ring before dedup: {}", first_ring.len());
{
profile!("Dedup first ring");
first_ring.sort_unstable();
first_ring.dedup();
}
info!("First ring after dedup: {}", first_ring.len());
let mut second_ring = first_ring
.iter()
.copied()
.flat_map(|i| neighbor_lists[i].iter().copied())
.collect::<Vec<_>>();
info!("Second ring before dedup: {}", second_ring.len());
{
profile!("Dedup second ring");
second_ring.sort_unstable();
second_ring.dedup();
}
info!("Second ring after dedup: {}", second_ring.len());
let mut narrow_band = surface_particles;
narrow_band.append(&mut first_ring);
narrow_band.append(&mut second_ring);
info!("All before dedup: {}", narrow_band.len());
{
profile!("Dedup entire narrow band");
narrow_band.sort_unstable();
narrow_band.dedup();
}
info!(
"All after dedup: {} ({:.3}%), interior particles: {}",
narrow_band.len(),
(narrow_band.len() as f64 / particles.len() as f64) * 100.0,
particles.len() - narrow_band.len()
);
narrow_band
};
{
profile!("Collect narrow band positions");
narrow_band
.into_iter()
.map(|i| particles[i])
.collect::<Vec<_>>()
}
}
pub(crate) fn decomposition<
I: Index,
R: Real,
C: subdomain_classification::ParticleToSubdomainClassifier<I, R>,
>(
parameters: &ParametersSubdomainGrid<I, R>,
particles: &[Vector3<R>],
) -> Result<Subdomains<I>, anyhow::Error> {
profile!("decomposition");
info!("Starting classification of particles into subdomains.");
let per_subdomain_counter_tls = ThreadLocal::<RefCell<MapType<I, usize>>>::new();
{
profile!("classifying particles");
particles
.par_chunks(parameters.chunk_size)
.for_each(|particle_chunk| {
let mut per_subdomain_counter = per_subdomain_counter_tls
.get_or(|| RefCell::new(new_map()))
.borrow_mut();
let mut classifier = C::new();
for particle in particle_chunk.iter() {
classifier.classify_particle(
particle,
¶meters.subdomain_grid,
parameters.ghost_particle_margin,
);
for i in 0..classifier.len() {
let flat_subdomain_idx = classifier.get(i);
*per_subdomain_counter.entry(flat_subdomain_idx).or_insert(0) += 1;
}
}
});
}
let global_per_subdomain_counter = new_parallel_map();
{
profile!("merging TL per cell particle counters");
let per_subdomain_counter_tls = per_subdomain_counter_tls
.into_iter()
.map(RefCell::into_inner)
.collect::<Vec<_>>();
per_subdomain_counter_tls
.into_par_iter()
.for_each(|per_cell_counter| {
for (flat_cell_index, count) in per_cell_counter {
*global_per_subdomain_counter
.entry(flat_cell_index)
.or_insert(0) += count;
}
});
}
let mut subdomain_compressed_indices = new_map();
let mut flat_subdomain_indices = vec![I::zero(); global_per_subdomain_counter.len()];
let per_subdomain_particle_count: Vec<usize> = {
profile!("initializing flat subdomain data and index mapping");
global_per_subdomain_counter
.into_iter()
.enumerate()
.map(|(i, (flat_cell_index, particle_count))| {
subdomain_compressed_indices.insert(flat_cell_index, i);
flat_subdomain_indices[i] = flat_cell_index;
particle_count
})
.collect()
};
let mut per_subdomain_particles = Vec::with_capacity(per_subdomain_particle_count.len());
per_subdomain_particles.resize_with(per_subdomain_particle_count.len(), || {
Mutex::new(Vec::new())
});
{
profile!("copying particles to subdomains");
particles
.par_chunks(parameters.chunk_size)
.enumerate()
.for_each(|(chunk_idx, particle_chunk)| {
let chunk_offset = chunk_idx * parameters.chunk_size;
let mut classifier = C::new();
for (particle_idx, particle) in particle_chunk.iter().enumerate() {
let particle_idx = chunk_offset + particle_idx;
classifier.classify_particle(
particle,
¶meters.subdomain_grid,
parameters.ghost_particle_margin,
);
for i in 0..classifier.len() {
let flat_subdomain_idx = classifier.get(i);
let compressed_subdomain_idx =
subdomain_compressed_indices[&flat_subdomain_idx];
let mut subdomain_particles =
per_subdomain_particles[compressed_subdomain_idx].lock();
if subdomain_particles.is_empty() {
let particle_count =
per_subdomain_particle_count[compressed_subdomain_idx];
subdomain_particles.reserve(particle_count);
}
subdomain_particles.push(particle_idx);
}
}
});
}
let mut per_subdomain_particles = per_subdomain_particles
.into_iter()
.map(Mutex::into_inner)
.collect::<Vec<_>>();
{
profile!("sort subdomain particles");
per_subdomain_particles
.par_iter_mut()
.for_each(|particles| {
particles.sort_unstable();
});
}
Ok(Subdomains {
flat_subdomain_indices,
per_subdomain_particles,
})
}
pub(crate) fn compute_global_densities_and_neighbors<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
global_particles: &[Vector3<R>],
subdomains: &Subdomains<I>,
) -> (Vec<R>, Vec<Vec<usize>>) {
profile!(parent, "compute_global_density_vector");
info!("Starting computation of global density vector.");
let global_particle_densities = Mutex::new(vec![R::zero(); global_particles.len()]);
let global_neighbors = Mutex::new(vec![Vec::new(); global_particles.len()]);
#[derive(Default)]
struct SubdomainWorkspace<R: Real> {
subdomain_particles: Vec<Vector3<R>>,
neighborhood_lists: FlatNeighborhoodList,
particle_densities: Vec<R>,
is_inside: Vec<bool>,
}
let workspace_tls = ThreadLocal::<RefCell<SubdomainWorkspace<R>>>::new();
subdomains
.flat_subdomain_indices
.par_iter()
.copied()
.zip(subdomains.per_subdomain_particles.par_iter())
.for_each(|(flat_subdomain_idx, subdomain_particle_indices)| {
profile!("subdomain density computation", parent = parent);
let mut workspace = workspace_tls.get_or_default().borrow_mut();
let SubdomainWorkspace {
subdomain_particles,
neighborhood_lists,
particle_densities,
is_inside,
} = &mut *workspace;
let flat_subdomain_idx: I = flat_subdomain_idx;
let subdomain_particle_indices: &[usize] = subdomain_particle_indices.as_slice();
{
profile!("collect subdomain data");
gather_subdomain_data(
global_particles,
subdomain_particle_indices,
subdomain_particles,
);
}
let subdomain_idx = parameters
.subdomain_grid
.try_unflatten_cell_index(flat_subdomain_idx)
.expect("Subdomain cell does not exist");
let subdomain_aabb = parameters.subdomain_grid.cell_aabb(&subdomain_idx);
let margin_aabb = {
let mut margin_aabb = subdomain_aabb.clone();
margin_aabb.grow_uniformly(parameters.ghost_particle_margin * 1.5.convert());
margin_aabb
};
{
profile!("initialize particle filter");
is_inside.clear();
reserve_total(is_inside, subdomain_particle_indices.len());
is_inside.extend(
subdomain_particles
.iter()
.map(|p| subdomain_aabb.contains_point(p)),
);
}
neighborhood_search_spatial_hashing_flat_filtered::<I, R>(
&margin_aabb,
subdomain_particles,
parameters.compact_support_radius,
neighborhood_lists,
|i| is_inside[i],
);
sequential_compute_particle_densities_filtered::<I, R, _>(
subdomain_particles,
neighborhood_lists,
parameters.compact_support_radius,
parameters.particle_rest_mass,
particle_densities,
|i| is_inside[i],
);
{
profile!("update global density values");
let mut global_particle_densities = global_particle_densities.lock();
is_inside
.iter()
.copied()
.zip(
subdomain_particle_indices
.iter()
.copied()
.zip(particle_densities.iter().copied()),
)
.filter(|(is_inside, _)| *is_inside)
.for_each(|(_, (particle_idx, density))| {
global_particle_densities[particle_idx] = density;
});
}
if parameters.global_neighborhood_list {
profile!("update global neighbor list");
let mut global_neighbors = global_neighbors.lock();
is_inside
.iter()
.copied()
.zip(
subdomain_particle_indices
.iter()
.copied()
.zip(neighborhood_lists.iter()),
)
.filter(|(is_inside, _)| *is_inside)
.for_each(|(_, (particle_idx, neighbors))| {
global_neighbors[particle_idx] = neighbors
.iter()
.copied()
.map(|local| subdomain_particle_indices[local])
.collect();
});
}
});
let global_particle_densities = global_particle_densities.into_inner();
let global_neighbors = global_neighbors.into_inner();
(global_particle_densities, global_neighbors)
}
pub(crate) struct SurfacePatch<I: Index, R: Real> {
pub vertices: Vec<Vector3<R>>,
pub triangles: Vec<[usize; 3]>,
pub vertex_inside_count: usize,
pub triangle_inside_count: usize,
pub vertex_inside_flags: Vec<bool>,
pub triangle_inside_flags: Vec<bool>,
pub exterior_vertex_edge_indices: Vec<(I, EdgeIndex<I>)>,
}
#[inline(always)]
fn particle_influence_aabb<I: Index, R: Real>(
particle: &Vector3<R>,
subdomain_mc_grid: &UniformCartesianCubeGrid3d<I, R>,
cube_radius: I,
) -> ([I; 3], [I; 3]) {
let extents = subdomain_mc_grid.points_per_dim();
let particle_cell = subdomain_mc_grid.enclosing_cell(particle);
let lower = [0, 1, 2].map(|i| {
particle_cell[i]
.saturating_sub(&cube_radius)
.max(I::zero())
.min(extents[i])
});
let upper = [0, 1, 2].map(|i| {
(particle_cell[i] + cube_radius + I::two())
.min(extents[i])
.max(I::zero())
});
(lower, upper)
}
fn local_to_global_point_ijk<I: Index>(
local_point_ijk: [I; 3],
subdomain_ijk: [I; 3],
cells_per_subdomain: [I; 3],
) -> [GlobalIndex; 3] {
let local_point_ijk = local_point_ijk.map(|i| <GlobalIndex as NumCast>::from(i).unwrap());
let subdomain_ijk = subdomain_ijk.map(|i| <GlobalIndex as NumCast>::from(i).unwrap());
let cells_per_subdomain =
cells_per_subdomain.map(|i| <GlobalIndex as NumCast>::from(i).unwrap());
let [i, j, k] = local_point_ijk;
[
subdomain_ijk[0] * cells_per_subdomain[0] + i,
subdomain_ijk[1] * cells_per_subdomain[1] + j,
subdomain_ijk[2] * cells_per_subdomain[2] + k,
]
}
pub fn density_grid_loop_auto<K: SymmetricKernel3d<f32>>(
levelset_grid: &mut [f32],
subdomain_particles: &[Vector3<f32>],
subdomain_particle_densities: &[f32],
subdomain_mc_grid: &UniformCartesianCubeGrid3d<i64, f32>,
subdomain_ijk: &[i64; 3],
global_mc_grid: &UniformCartesianCubeGrid3d<GlobalIndex, f32>,
cube_radius: i64,
squared_support_with_margin: f32,
particle_rest_mass: f32,
kernel: &K,
) {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma")
{
unsafe {
return density_grid_loop_avx(
levelset_grid,
subdomain_particles,
subdomain_particle_densities,
subdomain_mc_grid,
subdomain_ijk,
global_mc_grid,
cube_radius,
squared_support_with_margin,
particle_rest_mass,
kernel,
);
}
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
unsafe {
return density_grid_loop_neon(
levelset_grid,
subdomain_particles,
subdomain_particle_densities,
subdomain_mc_grid,
subdomain_ijk,
global_mc_grid,
cube_radius,
squared_support_with_margin,
particle_rest_mass,
kernel,
);
}
}
}
density_grid_loop_scalar::<i64, f32, K>(
levelset_grid,
subdomain_particles,
subdomain_particle_densities,
subdomain_mc_grid,
subdomain_ijk,
global_mc_grid,
cube_radius,
squared_support_with_margin,
particle_rest_mass,
kernel,
);
}
pub fn density_grid_loop_scalar<I: Index, R: Real, K: SymmetricKernel3d<R>>(
levelset_grid: &mut [R],
subdomain_particles: &[Vector3<R>],
subdomain_particle_densities: &[R],
subdomain_mc_grid: &UniformCartesianCubeGrid3d<I, R>,
subdomain_ijk: &[I; 3],
global_mc_grid: &UniformCartesianCubeGrid3d<GlobalIndex, R>,
cube_radius: I,
squared_support_with_margin: R,
particle_rest_mass: R,
kernel: &K,
) {
profile!("density grid loop (scalar)");
let mc_grid = subdomain_mc_grid;
for (p_i, rho_i) in subdomain_particles
.iter()
.copied()
.zip(subdomain_particle_densities.iter().copied())
{
let (lower, upper) = particle_influence_aabb(&p_i, &mc_grid, cube_radius);
for i in I::range(lower[0], upper[0]).iter() {
for j in I::range(lower[1], upper[1]).iter() {
for k in I::range(lower[2], upper[2]).iter() {
let point_ijk = [i, j, k];
let local_point = mc_grid
.get_point(point_ijk)
.expect("point has to be part of the subdomain grid");
let mc_cells_per_subdomain = mc_grid.cells_per_dim();
let global_point_ijk = local_to_global_point_ijk(
point_ijk,
*subdomain_ijk,
*mc_cells_per_subdomain,
);
let global_point = global_mc_grid
.get_point(global_point_ijk)
.expect("point has to be part of the global mc grid");
let point_coordinates = global_mc_grid.point_coordinates(&global_point);
let dx = p_i - point_coordinates;
let dx_norm_sq = dx.norm_squared();
if dx_norm_sq < squared_support_with_margin {
let v_i = particle_rest_mass / rho_i;
let r = dx_norm_sq.sqrt();
let w_ij = kernel.evaluate(r);
let interpolated_value = v_i * w_ij;
let flat_point_idx = mc_grid.flatten_point_index(&local_point);
let flat_point_idx = flat_point_idx.to_usize().unwrap();
levelset_grid[flat_point_idx] += interpolated_value;
}
}
}
}
}
}
#[cfg(all(target_arch = "aarch64"))]
#[target_feature(enable = "neon")]
pub fn density_grid_loop_neon<K: SymmetricKernel3d<f32>>(
levelset_grid: &mut [f32],
subdomain_particles: &[Vector3<f32>],
subdomain_particle_densities: &[f32],
subdomain_mc_grid: &UniformCartesianCubeGrid3d<i64, f32>,
subdomain_ijk: &[i64; 3],
global_mc_grid: &UniformCartesianCubeGrid3d<GlobalIndex, f32>,
cube_radius: i64,
_squared_support_with_margin: f32,
particle_rest_mass: f32,
kernel: &K,
) {
use core::arch::aarch64::*;
const LANES: usize = 4;
assert!(
global_mc_grid
.points_per_dim()
.iter()
.copied()
.all(|p| p < u32::MAX as i64),
"global marching cubes grid has too many vertices per dimension ({:?}) to fit into u32 (max: {}) for density_grid_loop_neon",
global_mc_grid.points_per_dim(),
u32::MAX
);
let kernel_neon = kernel::CubicSplineKernelNeonF32::new(kernel.compact_support_radius());
profile!("density grid loop (neon)");
let mc_grid = subdomain_mc_grid;
let mc_points = mc_grid.points_per_dim();
let dim_y = mc_points[1] as usize;
let dim_z = mc_points[2] as usize;
const K_OFFSETS_ARR: [u32; 4] = [0, 1, 2, 3];
let k_offsets = unsafe { vld1q_u32(K_OFFSETS_ARR.as_ptr()) };
let global_min = global_mc_grid.aabb().min();
let min_z_v = vdupq_n_f32(global_min[2]);
let cube_size = global_mc_grid.cell_size();
let support = kernel.compact_support_radius();
let support_sq_v = vdupq_n_f32(support * support);
for (p_i, rho_i) in subdomain_particles
.iter()
.copied()
.zip(subdomain_particle_densities.iter().copied())
{
let v_i = particle_rest_mass / rho_i;
let (lower, upper) = particle_influence_aabb(&p_i, &mc_grid, cube_radius);
let lower = lower.map(|i| i as usize);
let upper = upper.map(|i| i as usize);
let remainder = (upper[2] - lower[2]) % LANES;
let upper_k_aligned = upper[2] - remainder;
let subdomain_ijk = subdomain_ijk.map(|i| i as u32);
let mc_cells = mc_grid.cells_per_dim().map(|i| i as u32);
let particle_xs = vdupq_n_f32(p_i[0]);
let particle_ys = vdupq_n_f32(p_i[1]);
let particle_zs = vdupq_n_f32(p_i[2]);
let evaluate_contribution =
|k: usize, grid_xs: float32x4_t, grid_ys: float32x4_t| -> float32x4_t {
let global_k_base = vdupq_n_u32(subdomain_ijk[2] * mc_cells[2] + k as u32);
let global_k = vaddq_u32(global_k_base, k_offsets);
let global_k_f32 = vcvtq_f32_u32(global_k);
let grid_zs = vmlaq_n_f32(min_z_v, global_k_f32, cube_size);
let dxs = vsubq_f32(particle_xs, grid_xs);
let dys = vsubq_f32(particle_ys, grid_ys);
let dzs = vsubq_f32(particle_zs, grid_zs);
let dist_sq = vmlaq_f32(vmulq_f32(dxs, dxs), dys, dys);
let dist_sq = vmlaq_f32(dist_sq, dzs, dzs);
let inside_mask = vcltq_f32(dist_sq, support_sq_v);
let any_inside = vmaxvq_u32(inside_mask);
if any_inside == 0 {
return vdupq_n_f32(0.0);
}
let dist = vsqrtq_f32(dist_sq);
let w = kernel_neon.evaluate(dist);
vbslq_f32(inside_mask, w, vdupq_n_f32(0.0))
};
for i in lower[0]..upper[0] {
for j in lower[1]..upper[1] {
let flat_index_base = (i * dim_y + j) * dim_z;
let global_i = subdomain_ijk[0] * mc_cells[0] + i as u32;
let global_j = subdomain_ijk[1] * mc_cells[1] + j as u32;
let grid_x = global_i as f32 * cube_size + global_min[0];
let grid_y = global_j as f32 * cube_size + global_min[1];
let grid_xs = vdupq_n_f32(grid_x);
let grid_ys = vdupq_n_f32(grid_y);
for k in (lower[2]..upper_k_aligned).step_by(LANES) {
let flat_point_idx = flat_index_base + k;
let w_ij = evaluate_contribution(k, grid_xs, grid_ys);
let prev_val = unsafe {
vld1q_f32(levelset_grid.as_ptr().offset(flat_point_idx as isize))
};
let val = vmlaq_n_f32(prev_val, w_ij, v_i);
unsafe {
vst1q_f32(
levelset_grid.as_mut_ptr().offset(flat_point_idx as isize),
val,
)
};
}
if remainder > 0 {
let flat_point_idx = flat_index_base + upper_k_aligned;
let w_ij = evaluate_contribution(upper_k_aligned, grid_xs, grid_ys);
let w_ij_v = vmulq_n_f32(w_ij, v_i);
let w_ij_v =
unsafe { std::mem::transmute::<float32x4_t, [f32; LANES]>(w_ij_v) };
for rr in 0..remainder {
levelset_grid[flat_point_idx + rr] += w_ij_v[rr];
}
}
}
}
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")]
pub fn density_grid_loop_avx<K: SymmetricKernel3d<f32>>(
levelset_grid: &mut [f32],
subdomain_particles: &[Vector3<f32>],
subdomain_particle_densities: &[f32],
subdomain_mc_grid: &UniformCartesianCubeGrid3d<i64, f32>,
subdomain_ijk: &[i64; 3],
global_mc_grid: &UniformCartesianCubeGrid3d<GlobalIndex, f32>,
cube_radius: i64,
_squared_support_with_margin: f32,
particle_rest_mass: f32,
kernel: &K,
) {
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
const LANES: usize = 8;
assert!(
global_mc_grid
.points_per_dim()
.iter()
.copied()
.all(|p| p < i32::MAX as i64),
"global marching cubes grid has too many vertices per dimension ({:?}) to fit into i32 (max: {}) for density_grid_loop_avx",
global_mc_grid.points_per_dim(),
i32::MAX
);
let kernel_avx = kernel::CubicSplineKernelAvxF32::new(kernel.compact_support_radius());
profile!("density grid loop (avx)");
let mc_grid = subdomain_mc_grid;
let mc_points = mc_grid.points_per_dim();
let dim_y = mc_points[1] as usize;
let dim_z = mc_points[2] as usize;
let k_offsets_i32 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let global_min = global_mc_grid.aabb().min();
let min_z_v = _mm256_set1_ps(global_min[2]);
let cube_size = global_mc_grid.cell_size();
let cube_size_ps = _mm256_set1_ps(cube_size);
let support = kernel.compact_support_radius();
let support_sq_v = _mm256_set1_ps(support * support);
for (p_i, rho_i) in subdomain_particles
.iter()
.copied()
.zip(subdomain_particle_densities.iter().copied())
{
let v_i = particle_rest_mass / rho_i;
let (lower, upper) = particle_influence_aabb(&p_i, &mc_grid, cube_radius);
let lower = lower.map(|i| i as usize);
let upper = upper.map(|i| i as usize);
let remainder = (upper[2] - lower[2]) % LANES;
let upper_k_aligned = upper[2] - remainder;
let subdomain_ijk_i32 = subdomain_ijk.map(|i| i as i32);
let mc_cells = mc_grid.cells_per_dim().map(|i| i as i32);
let particle_xs = _mm256_set1_ps(p_i[0]);
let particle_ys = _mm256_set1_ps(p_i[1]);
let particle_zs = _mm256_set1_ps(p_i[2]);
let v_i_ps = _mm256_set1_ps(v_i);
let evaluate_contribution = |k: usize, grid_xs: __m256, grid_ys: __m256| -> __m256 {
let base_k = subdomain_ijk_i32[2] * mc_cells[2] + k as i32;
let global_k_i32 = _mm256_add_epi32(_mm256_set1_epi32(base_k), k_offsets_i32);
let global_k_f32 = _mm256_cvtepi32_ps(global_k_i32);
let grid_zs = _mm256_fmadd_ps(global_k_f32, cube_size_ps, min_z_v);
let dxs = _mm256_sub_ps(particle_xs, grid_xs);
let dys = _mm256_sub_ps(particle_ys, grid_ys);
let dzs = _mm256_sub_ps(particle_zs, grid_zs);
let dist_sq = {
let t = _mm256_fmadd_ps(dxs, dxs, _mm256_mul_ps(dys, dys));
_mm256_fmadd_ps(dzs, dzs, t)
};
let inside_mask = _mm256_cmp_ps::<_CMP_LT_OQ>(dist_sq, support_sq_v);
if _mm256_movemask_ps(inside_mask) == 0 {
return _mm256_set1_ps(0.0);
}
let dist = _mm256_sqrt_ps(dist_sq);
let w = kernel_avx.evaluate(dist);
_mm256_and_ps(w, inside_mask)
};
for i in lower[0]..upper[0] {
for j in lower[1]..upper[1] {
let flat_index_base = (i * dim_y + j) * dim_z;
let global_i = subdomain_ijk_i32[0] * mc_cells[0] + i as i32;
let global_j = subdomain_ijk_i32[1] * mc_cells[1] + j as i32;
let grid_x = global_i as f32 * cube_size + global_min[0];
let grid_y = global_j as f32 * cube_size + global_min[1];
let grid_xs = _mm256_set1_ps(grid_x);
let grid_ys = _mm256_set1_ps(grid_y);
for k in (lower[2]..upper_k_aligned).step_by(LANES) {
let flat_point_idx = flat_index_base + k;
let w_ij = evaluate_contribution(k, grid_xs, grid_ys);
unsafe {
let prev_val = _mm256_loadu_ps(levelset_grid.as_ptr().add(flat_point_idx));
let val = _mm256_fmadd_ps(w_ij, v_i_ps, prev_val);
_mm256_storeu_ps(levelset_grid.as_mut_ptr().add(flat_point_idx), val);
}
}
if remainder > 0 {
let flat_point_idx = flat_index_base + upper_k_aligned;
let w_ij = evaluate_contribution(upper_k_aligned, grid_xs, grid_ys);
unsafe {
let val = _mm256_mul_ps(w_ij, v_i_ps);
let mut tmp: [f32; LANES] = [0.0; LANES];
_mm256_storeu_ps(tmp.as_mut_ptr(), val);
for rr in 0..remainder {
levelset_grid[flat_point_idx + rr] += tmp[rr];
}
}
}
}
}
}
}
pub fn density_grid_loop_sparse<I: Index, R: Real, K: SymmetricKernel3d<R>>(
levelset_grid: &mut [R],
index_cache: &mut Vec<I>,
subdomain_particles: &[Vector3<R>],
subdomain_particle_densities: &[R],
subdomain_mc_grid: &UniformCartesianCubeGrid3d<I, R>,
subdomain_ijk: &[I; 3],
global_mc_grid: &UniformCartesianCubeGrid3d<GlobalIndex, R>,
cube_radius: I,
squared_support_with_margin: R,
particle_rest_mass: R,
surface_threshold: R,
kernel: &K,
) {
profile!("density grid loop (sparse)");
let mc_grid = subdomain_mc_grid;
for (p_i, rho_i) in subdomain_particles
.iter()
.copied()
.zip(subdomain_particle_densities.iter().copied())
{
let (lower, upper) = particle_influence_aabb(&p_i, &mc_grid, cube_radius);
for i in I::range(lower[0], upper[0]).iter() {
for j in I::range(lower[1], upper[1]).iter() {
for k in I::range(lower[2], upper[2]).iter() {
let point_ijk = [i, j, k];
let local_point = mc_grid
.get_point(point_ijk)
.expect("point has to be part of the subdomain grid");
let mc_cells_per_subdomain = mc_grid.cells_per_dim();
let global_point_ijk = local_to_global_point_ijk(
point_ijk,
*subdomain_ijk,
*mc_cells_per_subdomain,
);
let global_point = global_mc_grid
.get_point(global_point_ijk)
.expect("point has to be part of the global mc grid");
let point_coordinates = global_mc_grid.point_coordinates(&global_point);
let dx = p_i - point_coordinates;
let dx_norm_sq = dx.norm_squared();
if dx_norm_sq < squared_support_with_margin {
let v_i = particle_rest_mass / rho_i;
let r = dx_norm_sq.sqrt();
let w_ij = kernel.evaluate(r);
let interpolated_value = v_i * w_ij;
let flat_point_idx = mc_grid.flatten_point_index(&local_point);
let flat_point_idx = flat_point_idx.to_usize().unwrap();
levelset_grid[flat_point_idx] += interpolated_value;
if levelset_grid[flat_point_idx] > surface_threshold {
for c in mc_grid
.cells_adjacent_to_point(
&mc_grid.get_point_neighborhood(&local_point),
)
.iter()
.flatten()
{
let flat_cell_index = mc_grid.flatten_cell_index(c);
index_cache.push(flat_cell_index);
}
}
}
}
}
}
}
}
pub(crate) fn reconstruct_subdomains<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
global_particles: &[Vector3<R>],
global_particle_densities: &[R],
subdomains: &Subdomains<I>,
enable_simd: bool,
) -> Vec<SurfacePatch<I, R>> {
profile!(parent, "reconstruction");
let squared_support = parameters.compact_support_radius * parameters.compact_support_radius;
let squared_support_with_margin = squared_support * 1.01.convert();
let cube_radius = I::from((parameters.compact_support_radius / parameters.cube_size).ceil())
.expect("kernel radius in cubes has to fit in index type");
let kernel = CubicSplineKernel::new(parameters.compact_support_radius);
let mc_total_cells = parameters.subdomain_cubes.cubed();
let mc_total_points = (parameters.subdomain_cubes + I::one()).cubed();
assert!(
mc_total_points.to_usize().is_some(),
"number of mc cubes per subdomain must be fit into usize"
);
let max_particles = subdomains
.per_subdomain_particles
.iter()
.map(|p| p.len())
.max()
.unwrap_or(0);
info!("Largest subdomain has {} particles.", max_particles);
let sparse_limit = (max_particles / (100 / 5)).max(100);
info!(
"Subdomains with {} or less particles will be considered sparse.",
sparse_limit
);
info!("Starting reconstruction (level-set evaluation and local triangulation).");
let globalize_local_edge = |mc_grid: &UniformCartesianCubeGrid3d<I, R>,
subdomain_grid: &UniformCartesianCubeGrid3d<I, R>,
subdomain_index: I,
local_edge: &EdgeIndex<I>|
-> (I, EdgeIndex<I>) {
let max_mc_point_index = mc_grid.points_per_dim().map(|i| i - I::one());
let max_subdomain_index = subdomain_grid
.cells_per_dim()
.map(|i| i.saturating_sub(&I::one()).max(I::zero()));
let is_max = local_edge.axis().orthogonal_axes().map(|orth_axis| {
if local_edge.origin().index()[orth_axis.dim()] == max_mc_point_index[orth_axis.dim()] {
true
} else {
false
}
});
if !is_max[0] && !is_max[1] {
(subdomain_index, *local_edge)
} else {
let subdomain_cell = subdomain_grid
.try_unflatten_cell_index(subdomain_index)
.expect("invalid subdomain index");
let mut target_subdomain_ijk = *subdomain_cell.index();
let mut target_local_origin_ijk = *local_edge.origin().index();
for (&orth_axis, &is_max) in local_edge
.axis()
.orthogonal_axes()
.iter()
.zip(is_max.iter())
{
if is_max {
target_subdomain_ijk[orth_axis.dim()] = (target_subdomain_ijk[orth_axis.dim()]
+ I::one())
.min(max_subdomain_index[orth_axis.dim()]);
target_local_origin_ijk[orth_axis.dim()] = I::zero();
}
}
let target_subdomain = subdomain_grid
.get_cell(target_subdomain_ijk)
.expect("target subdomain has to exist");
let flat_target_subdomain = subdomain_grid.flatten_cell_index(&target_subdomain);
let new_local_edge = mc_grid
.get_edge(target_local_origin_ijk, local_edge.axis())
.expect("failed to translate edge");
(flat_target_subdomain, new_local_edge)
}
};
#[derive(Default)]
struct SubdomainWorkspace<I: Index, R: Real> {
subdomain_particles: Vec<Vector3<R>>,
subdomain_particle_densities: Vec<R>,
levelset_grid: Vec<R>,
index_cache: Vec<I>,
}
let workspace_tls = ThreadLocal::<RefCell<SubdomainWorkspace<I, R>>>::new();
let reconstruct_patch = |flat_subdomain_idx: I,
subdomain_particle_indices: &Vec<usize>,
sparse: bool| {
let mut workspace = workspace_tls.get_or_default().borrow_mut();
let SubdomainWorkspace {
subdomain_particles,
subdomain_particle_densities,
levelset_grid,
index_cache,
} = &mut *workspace;
let flat_subdomain_idx: I = flat_subdomain_idx;
let subdomain_particle_indices: &[usize] = subdomain_particle_indices.as_slice();
{
gather_subdomain_data(
global_particles,
subdomain_particle_indices,
subdomain_particles,
);
gather_subdomain_data(
global_particle_densities,
subdomain_particle_indices,
subdomain_particle_densities,
);
}
let subdomain_idx = parameters
.subdomain_grid
.try_unflatten_cell_index(flat_subdomain_idx)
.expect("Subdomain cell does not exist");
let subdomain_aabb = parameters.subdomain_grid.cell_aabb(&subdomain_idx);
let mc_grid = UniformCartesianCubeGrid3d::new(
subdomain_aabb.min(),
&[parameters.subdomain_cubes; 3],
parameters.cube_size,
)
.unwrap();
levelset_grid.fill(R::zero());
levelset_grid.resize(mc_total_points.to_usize().unwrap(), R::zero());
index_cache.clear();
if sparse {
density_grid_loop_sparse(
levelset_grid.as_mut_slice(),
index_cache,
subdomain_particles.as_slice(),
subdomain_particle_densities.as_slice(),
&mc_grid,
&subdomain_idx.index(),
¶meters.global_marching_cubes_grid,
cube_radius,
squared_support_with_margin,
parameters.particle_rest_mass,
parameters.surface_threshold,
&kernel,
);
} else {
use std::any::TypeId;
use std::mem::transmute;
if enable_simd
&& TypeId::of::<I>() == TypeId::of::<i64>()
&& TypeId::of::<R>() == TypeId::of::<f32>()
{
density_grid_loop_auto(
unsafe { transmute::<&mut [R], &mut [f32]>(levelset_grid.as_mut_slice()) },
unsafe {
transmute::<&[Vector3<R>], &[Vector3<f32>]>(subdomain_particles.as_slice())
},
unsafe { transmute::<&[R], &[f32]>(subdomain_particle_densities.as_slice()) },
unsafe {
transmute::<
&UniformCartesianCubeGrid3d<I, R>,
&UniformCartesianCubeGrid3d<i64, f32>,
>(&mc_grid)
},
&subdomain_idx.index().map(|i| i.to_i64().unwrap()),
unsafe {
transmute::<
&UniformCartesianCubeGrid3d<GlobalIndex, R>,
&UniformCartesianCubeGrid3d<GlobalIndex, f32>,
>(¶meters.global_marching_cubes_grid)
},
cube_radius.to_i64().unwrap(),
squared_support_with_margin.to_f32().unwrap(),
parameters.particle_rest_mass.to_f32().unwrap(),
&CubicSplineKernel::new(parameters.compact_support_radius.to_f32().unwrap()),
);
} else {
density_grid_loop_scalar(
levelset_grid.as_mut_slice(),
subdomain_particles.as_slice(),
subdomain_particle_densities.as_slice(),
&mc_grid,
&subdomain_idx.index(),
¶meters.global_marching_cubes_grid,
cube_radius,
squared_support_with_margin,
parameters.particle_rest_mass,
&kernel,
);
}
}
let mut vertices = Vec::new();
let mut triangles = Vec::new();
let mut vertex_inside_count = 0;
let mut triangle_inside_count = 0;
let mut vertex_inside_flags = Vec::new();
let mut triangle_inside_flags = Vec::new();
let mut exterior_vertex_edge_indices = Vec::new();
let mut edge_to_vertex = new_map();
let mut triangulate_cell = |flat_cell_idx: I| {
let cell = mc_grid.try_unflatten_cell_index(flat_cell_idx).unwrap();
let mut vertices_inside = [true; 8];
let mut any_inside = false;
for local_point_index in 0..8 {
let point = cell.global_point_index_of(local_point_index).unwrap();
let flat_point_idx = mc_grid.flatten_point_index(&point);
let flat_point_idx = flat_point_idx.to_usize().unwrap();
let density_value = levelset_grid[flat_point_idx];
let vertex_inside = density_value > parameters.surface_threshold;
any_inside |= vertex_inside;
vertices_inside[local_point_index] = vertex_inside;
}
if !any_inside {
return;
}
for triangle in marching_cubes_triangulation_iter(&vertices_inside) {
let mut global_triangle = [0; 3];
for (v_idx, local_edge_index) in triangle.iter().copied().enumerate() {
let edge = cell
.global_edge_index_of(local_edge_index as usize)
.unwrap();
let vertex_index = *edge_to_vertex.entry(edge).or_insert_with(|| {
let origin_coords = mc_grid.point_coordinates(edge.origin());
let target_coords = mc_grid.point_coordinates(&edge.target());
let flat_origin_idx = mc_grid
.flatten_point_index(edge.origin())
.to_usize()
.unwrap();
let flat_target_idx = mc_grid
.flatten_point_index(&edge.target())
.to_usize()
.unwrap();
let origin_value = levelset_grid[flat_origin_idx];
let target_value = levelset_grid[flat_target_idx];
let alpha = (parameters.surface_threshold - origin_value)
/ (target_value - origin_value);
let interpolated_coords =
origin_coords * (R::one() - alpha) + target_coords * alpha;
let vertex_coords = interpolated_coords;
vertices.push(vertex_coords);
let vertex_index = vertices.len() - 1;
let is_interior_vertex = !mc_grid.is_boundary_edge(&edge);
vertex_inside_count += is_interior_vertex as usize;
vertex_inside_flags.push(is_interior_vertex);
if !is_interior_vertex {
exterior_vertex_edge_indices.push(globalize_local_edge(
&mc_grid,
¶meters.subdomain_grid,
flat_subdomain_idx,
&edge,
));
}
vertex_index
});
global_triangle[v_idx] = vertex_index;
}
let all_tri_vertices_inside = global_triangle
.iter()
.copied()
.all(|v_idx| vertex_inside_flags[v_idx]);
triangles.push(global_triangle);
triangle_inside_count += all_tri_vertices_inside as usize;
triangle_inside_flags.push(all_tri_vertices_inside);
}
};
{
profile!("mc triangulation loop");
if sparse {
index_cache.sort_unstable();
for flat_cell_idx in index_cache.iter().copied().dedup() {
triangulate_cell(flat_cell_idx);
}
} else {
for flat_cell_idx in I::range(I::zero(), mc_total_cells).iter() {
triangulate_cell(flat_cell_idx);
}
}
}
SurfacePatch {
vertices,
triangles,
vertex_inside_count,
triangle_inside_count,
vertex_inside_flags,
triangle_inside_flags,
exterior_vertex_edge_indices,
}
};
let mut surface_patches = Vec::with_capacity(subdomains.flat_subdomain_indices.len());
subdomains
.flat_subdomain_indices
.par_iter()
.copied()
.zip(subdomains.per_subdomain_particles.par_iter())
.map(|(flat_subdomain_idx, subdomain_particle_indices)| {
if subdomain_particle_indices.len() <= sparse_limit {
profile!("subdomain reconstruction (sparse)", parent = parent);
reconstruct_patch(flat_subdomain_idx, subdomain_particle_indices, true)
} else {
profile!("subdomain reconstruction (dense)", parent = parent);
reconstruct_patch(flat_subdomain_idx, subdomain_particle_indices, false)
}
})
.collect_into_vec(&mut surface_patches);
surface_patches
}
pub(crate) fn stitching<I: Index, R: Real>(
surface_patches: Vec<SurfacePatch<I, R>>,
) -> TriMesh3d<R> {
profile!("stitching");
info!("Starting stitching of subdomain meshes to global mesh.");
let vert_and_tri_offsets = {
profile!("surface patch offset scan");
std::iter::once((0, 0))
.chain(surface_patches.iter().scan((0, 0), |offsets, patch| {
let (vert_offset, tri_offset) = offsets;
*vert_offset += patch.vertex_inside_count;
*tri_offset += patch.triangle_inside_count;
Some(*offsets)
}))
.collect::<Vec<_>>()
};
let (total_interior_vert_count, total_interior_tri_count) = vert_and_tri_offsets
.last()
.copied()
.expect("there has to be at least one entry in the offset list");
let mut interior_vertices = vec![Vector3::<R>::zeros(); total_interior_vert_count];
let mut interior_triangles = vec![[0, 0, 0]; total_interior_tri_count];
let mut exterior_vertices = Vec::new();
let mut exterior_triangles = Vec::new();
let mut exterior_vertex_mapping = new_map();
{
profile!("copy interior verts/tris and deduplicate exterior verts");
if vert_and_tri_offsets.len() > 1 {
vert_and_tri_offsets
.windows(2)
.zip(surface_patches.iter())
.for_each(|(offsets, patch)| {
if let [start_offsets, end_offsets] = offsets {
let (start_verts, start_tris) = *start_offsets;
let (end_verts, end_tris) = *end_offsets;
let global_vertex_offset = start_verts;
let mut local_to_global_vertex_mapping = vec![0; patch.vertices.len()];
{
let out_verts = &mut interior_vertices[start_verts..end_verts];
patch
.vertices
.iter()
.zip(patch.vertex_inside_flags.iter())
.enumerate()
.filter_map(|(i, (v, is_interior))| is_interior.then_some((i, v)))
.enumerate()
.for_each(|(new_local_idx, (old_local_idx, vert))| {
out_verts[new_local_idx] = *vert;
local_to_global_vertex_mapping[old_local_idx] =
global_vertex_offset + new_local_idx;
});
}
{
let out_tris = &mut interior_triangles[start_tris..end_tris];
patch
.triangles
.iter()
.zip(patch.triangle_inside_flags.iter())
.filter_map(|(tri, is_interior)| is_interior.then_some(tri))
.enumerate()
.for_each(|(tri_idx, tri)| {
out_tris[tri_idx] = [
local_to_global_vertex_mapping[tri[0]],
local_to_global_vertex_mapping[tri[1]],
local_to_global_vertex_mapping[tri[2]],
];
});
}
{
patch
.vertices
.iter()
.zip(patch.vertex_inside_flags.iter())
.enumerate()
.filter_map(|(i, (v, is_interior))| {
(!is_interior).then_some((i, v))
})
.zip(patch.exterior_vertex_edge_indices.iter())
.for_each(|((old_local_idx, vert), edge_index)| {
let global_index = *exterior_vertex_mapping
.entry(*edge_index)
.or_insert_with(|| {
let global_index =
total_interior_vert_count + exterior_vertices.len();
exterior_vertices.push(*vert);
global_index
});
local_to_global_vertex_mapping[old_local_idx] = global_index;
});
}
{
patch
.triangles
.iter()
.zip(patch.triangle_inside_flags.iter())
.filter_map(|(tri, is_interior)| (!is_interior).then_some(tri))
.for_each(|tri| {
exterior_triangles.push(tri.map(|local_vert| {
local_to_global_vertex_mapping[local_vert]
}))
});
}
}
});
}
}
let mut vertices = interior_vertices;
vertices.append(&mut exterior_vertices);
let mut triangles = interior_triangles;
triangles.append(&mut exterior_triangles);
TriMesh3d {
vertices,
triangles,
}
}
pub(crate) mod subdomain_classification {
use super::*;
pub trait ParticleToSubdomainClassifier<I: Index, R: Real> {
fn new() -> Self;
fn classify_particle(
&mut self,
particle: &Vector3<R>,
subdomain_grid: &UniformCartesianCubeGrid3d<I, R>,
ghost_particle_margin: R,
);
fn len(&self) -> usize;
fn get(&self, i: usize) -> I;
}
pub struct GhostMarginClassifier<I: Index> {
subdomains: Vec<I>,
}
impl<I: Index, R: Real> ParticleToSubdomainClassifier<I, R> for GhostMarginClassifier<I> {
fn new() -> Self {
GhostMarginClassifier {
subdomains: Vec::with_capacity(27),
}
}
fn classify_particle(
&mut self,
particle: &Vector3<R>,
subdomain_grid: &UniformCartesianCubeGrid3d<I, R>,
ghost_particle_margin: R,
) {
self.subdomains.clear();
ghost_particle_classification(
particle,
subdomain_grid,
ghost_particle_margin,
&mut self.subdomains,
)
}
#[inline(always)]
fn len(&self) -> usize {
self.subdomains.len()
}
#[inline(always)]
fn get(&self, i: usize) -> I {
self.subdomains[i]
}
}
fn ghost_particle_classification<I: Index, R: Real>(
particle: &Vector3<R>,
subdomain_grid: &UniformCartesianCubeGrid3d<I, R>,
ghost_particle_margin: R,
subdomains: &mut Vec<I>,
) {
let subdomain_ijk = subdomain_grid.enclosing_cell(particle);
if subdomain_grid.get_cell(subdomain_ijk).is_none() {
return;
}
let dx = subdomain_grid.cell_size();
let subdomain_radius = I::from((ghost_particle_margin / dx).ceil())
.map(|i| i.to_i32())
.flatten()
.expect(
"ghost particle margin relative to subdomains has to fit in index type and i32",
);
let subdomain_aabb = subdomain_grid.cell_aabb(
&subdomain_grid
.get_cell(subdomain_ijk)
.expect("Subdomain has to be part of grid"),
);
let min_corner = subdomain_aabb.min();
let max_corner = subdomain_aabb.max();
let is_in_ghost_margin_single_dim = |step: i32, axis: usize| -> bool {
let step_offset_real = R::from(step.abs() - 1).unwrap();
if step > 0 {
((max_corner[axis] + step_offset_real * dx) - particle[axis])
< ghost_particle_margin
} else if step < 0 {
(particle[axis] - (min_corner[axis] - step_offset_real * dx))
< ghost_particle_margin
} else {
true
}
};
let is_in_ghost_margin = |x_step: i32, y_step: i32, z_step: i32| -> bool {
is_in_ghost_margin_single_dim(x_step, 0)
&& is_in_ghost_margin_single_dim(y_step, 1)
&& is_in_ghost_margin_single_dim(z_step, 2)
};
let checked_apply_step = |index: I, step: i32| -> Option<I> {
let direction = if step > 0 {
Some(Direction::Positive)
} else if step < 0 {
Some(Direction::Negative)
} else {
None
};
direction
.map(|d| d.checked_apply_step(index, I::from(step.abs()).unwrap()))
.unwrap_or(Some(index))
};
let checked_apply_step_ijk =
|ijk: [I; 3], x_step: i32, y_step: i32, z_step: i32| -> Option<[I; 3]> {
Some([
checked_apply_step(ijk[0], x_step)?,
checked_apply_step(ijk[1], y_step)?,
checked_apply_step(ijk[2], z_step)?,
])
};
let r = subdomain_radius;
for i in -r..=r {
for j in -r..=r {
for k in -r..=r {
let in_ghost_margin = is_in_ghost_margin(i, j, k);
if in_ghost_margin {
if let Some(neighbor_subdomain_ijk) =
checked_apply_step_ijk(subdomain_ijk, i, j, k)
&& let Some(cell) = subdomain_grid.get_cell(neighbor_subdomain_ijk)
{
subdomains.push(subdomain_grid.flatten_cell_index(&cell));
}
}
}
}
}
}
}
pub(crate) mod debug {
use super::*;
#[allow(unused)]
pub(crate) fn subdomain_stats<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
particles: &[Vector3<R>],
subdomains: &Subdomains<I>,
) {
profile!("subdomain stats");
info!("Statistics");
let per_subdomain_particle_count = subdomains
.per_subdomain_particles
.iter()
.map(|p| p.len())
.collect::<Vec<_>>();
info!(
"Occupied subdomains: {}",
per_subdomain_particle_count.len()
);
info!("Smallest Subdomains:");
for i in 0..11 {
let c = per_subdomain_particle_count
.iter()
.copied()
.filter(|c| *c == i)
.count();
info!(
"Number of subdomains with {} particles: {} ({:.2}% of number of subdomains)",
i,
c,
(c as f64 / per_subdomain_particle_count.len() as f64) * 100.0
);
}
info!("Other stats:");
for n in [10, 50, 100, 500, 1000, 2000, 10000] {
let c = per_subdomain_particle_count
.iter()
.copied()
.filter(|c| *c <= n)
.count();
info!(
"Number of subdomains with {} or fewer particles: {} ({:.2}% of number of subdomains)",
n,
c,
(c as f64 / per_subdomain_particle_count.len() as f64) * 100.0
);
}
{
let mut per_subdomain_particle_count = per_subdomain_particle_count.clone();
per_subdomain_particle_count.sort();
{
let largest_subdomain_particle_count =
per_subdomain_particle_count[per_subdomain_particle_count.len() - 1];
info!("Largest subdomain has {largest_subdomain_particle_count} particles");
for f in [0.95, 0.9, 0.8, 0.75, 0.5, 0.1] {
let c = per_subdomain_particle_count
.iter()
.copied()
.filter(|c| *c >= (f * largest_subdomain_particle_count as f64) as usize)
.count();
let n = per_subdomain_particle_count
.iter()
.copied()
.filter(|c| *c >= (f * largest_subdomain_particle_count as f64) as usize)
.sum::<usize>();
info!(
"Number of subdomains with {} or more particles ({}% of largest subdomain): {} ({:.2}% of number of subdomains), in sum {} particles ({:.2}% of all particles)",
(f * largest_subdomain_particle_count as f64) as usize,
f * 100.0,
c,
(c as f64 / per_subdomain_particle_count.len() as f64) * 100.0,
n,
100.0 * (n as f64 / particles.len() as f64)
);
}
}
info!("Largest subdomains:");
for i in 0..10 {
if let Some(&count) =
per_subdomain_particle_count.get(per_subdomain_particle_count.len() - 1 - i)
{
info!(
"{} particles ({:.2}% of all particles)",
count,
100.0 * (count as f64 / particles.len() as f64)
);
}
}
}
}
#[allow(unused)]
pub(crate) fn subdomains_to_hexmesh<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
subdomains: &Subdomains<I>,
) -> HexMesh3d<R> {
let mut hexmesh = HexMesh3d::default();
let subdomain_grid = ¶meters.subdomain_grid;
for &flat_subdomain_idx in &subdomains.flat_subdomain_indices {
let subdomain_ijk = subdomain_grid
.try_unflatten_cell_index(flat_subdomain_idx as I)
.unwrap();
let [i, j, k] = *subdomain_ijk.index();
let vertex_offset = hexmesh.vertices.len();
{
let mut push_vertex = |abc: [i8; 3]| {
let [a, b, c] = abc;
hexmesh.vertices.push(
subdomain_grid.point_coordinates(
&subdomain_grid
.get_point([
i + I::from(a).unwrap(),
j + I::from(b).unwrap(),
k + I::from(c).unwrap(),
])
.unwrap(),
),
);
};
push_vertex([0, 0, 0]);
push_vertex([1, 0, 0]);
push_vertex([1, 1, 0]);
push_vertex([0, 1, 0]);
push_vertex([0, 0, 1]);
push_vertex([1, 0, 1]);
push_vertex([1, 1, 1]);
push_vertex([0, 1, 1]);
}
hexmesh.cells.push([
vertex_offset,
vertex_offset + 1,
vertex_offset + 2,
vertex_offset + 3,
vertex_offset + 4,
vertex_offset + 5,
vertex_offset + 6,
vertex_offset + 7,
]);
}
hexmesh
}
#[allow(unused)]
pub(crate) fn count_no_owned_particles_subdomains<I: Index, R: Real>(
parameters: &ParametersSubdomainGrid<I, R>,
particles: &[Vector3<R>],
subdomains: &Subdomains<I>,
) -> usize {
profile!(parent, "count_no_owned_particles_subdomains");
let no_owned_particles_counter = AtomicUsize::new(0);
subdomains
.flat_subdomain_indices
.par_iter()
.copied()
.zip(subdomains.per_subdomain_particles.par_iter())
.for_each(|(flat_subdomain_idx, subdomain_particle_indices)| {
profile!("inner subdomain_tasks", parent = parent);
let flat_subdomain_idx: I = flat_subdomain_idx;
let subdomain_particle_indices: &[usize] = subdomain_particle_indices.as_slice();
let subdomain_particles = subdomain_particle_indices
.iter()
.copied()
.map(|idx| particles[idx])
.collect::<Vec<_>>();
let subdomain_idx = parameters
.subdomain_grid
.try_unflatten_cell_index(flat_subdomain_idx)
.expect("Subdomain cell does not exist");
let subdomain_aabb = parameters.subdomain_grid.cell_aabb(&subdomain_idx);
let non_ghost_particle_count = subdomain_particles
.iter()
.filter(|p| subdomain_aabb.contains_point(*p))
.count();
if non_ghost_particle_count == 0 {
no_owned_particles_counter.fetch_add(1, Ordering::AcqRel);
}
});
no_owned_particles_counter.into_inner()
}
}
fn int_ceil_div<T: Integer + Copy>(numerator: T, denominator: T) -> T {
numerator / denominator + (numerator % denominator).min(T::one())
}
fn reserve_total<T>(vec: &mut Vec<T>, total_capacity: usize) {
if total_capacity > vec.capacity() {
vec.reserve(total_capacity - vec.capacity());
}
}
fn gather_subdomain_data<T: Copy>(
global_data: &[T],
subdomain_particle_indices: &[usize],
subdomain_data: &mut Vec<T>,
) {
subdomain_data.clear();
reserve_total(subdomain_data, subdomain_particle_indices.len());
subdomain_data.extend(
subdomain_particle_indices
.iter()
.copied()
.map(|idx| global_data[idx]),
);
}