dedup_mesh 0.2.0

Deduplicates vertices in a 3d mesh
Documentation
// SPDX-License-Identifier: MIT OR Apache-2.0
// Copyright (c) 2025 lacklustr@protonmail.com https://github.com/eadf

mod auto;
#[cfg(feature = "parallel")]
mod multithreaded;
mod singlethreaded;

use crate::hash_grid::HashType;
use crate::prelude::*;
use crate::scalar::ScalarKernel;
use crate::util::Array3;
use crate::util::{Aabb, UnsafeArray};
use crate::{CheckFinitePolicy, IndexType};
use num_traits::AsPrimitive;
use rustc_hash::FxHashMap;
use std::hash::Hash;
use vob::Vob;

/// A compile time check that `MultiThreaded` is always used together with `FiniteCheck`
pub trait CompatibleWith<FiniteCheck> {}

#[cfg(feature = "parallel")]
/// Executes the de-duplication algorithm using all available CPU cores.
///
/// Requires the `parallel` crate feature (enabling `rayon` backend).
///
/// # Performance
///
/// - Significantly faster for large meshes (e.g., >50K–100K vertices).
/// - Uses all available logical CPU cores via [`rayon`].
/// - Less efficient **per CPU core** than [`SingleThreaded`] due to parallel overhead.
///
/// Typical speedup: ~2× faster at ~80,000 vertices (depending on mesh structure and hardware).
///
/// # Example
/// ```rust,ignore
/// dedup::<f32, usize, [f32; 3], MultiThreaded, Triangulated>(...)?;
/// ```
///
/// [`rayon`]: https://crates.io/crates/rayon
pub struct MultiThreaded;

/// Use this policy to run the de-duplication in a single thread.
///
/// This should be the default choice for simpler or smaller datasets where
/// multi-threading overhead would outweigh its benefits.
/// It is only at 2000 vertices or more the multithreaded solution outperforms the
/// single-threaded one. Per CPU cycle SingleThreaded will always be the most efficient mode.  
///
/// Use with the `Threading` type parameter in `vertex_deduplication`.
///
/// # Example
/// ```rust,ignore
/// dedup::<f32, SingleThreaded, [f32;3], _, _>(...)?;
/// ```
pub struct SingleThreaded;

/// Automatically selects the best threading policy at runtime.
///
/// - If the `parallel` feature is enabled and the mesh is large, selects [`MultiThreaded`].
/// - Otherwise, defaults to [`SingleThreaded`].
///
/// # Recommendation
///
/// This is the **default threading policy** recommended for most users.
///
/// # Fallback Behavior
///
/// Without the `parallel` feature enabled, `Auto` **always uses** `SingleThreaded`.
///
/// # Example
/// ```rust,ignore
/// dedup::<f32, usize, [f32; 3], Auto, Triangulated>(...)?;
/// ```
pub struct Auto;

/// Select either SingleThreaded, Multithreaded or Auto threading policy.
pub trait ThreadingDispatch {
    fn dedup_dispatch<T, Index, Vout, Topology>(
        vertices: &[impl Into<[T; 3]> + Clone + Sync],
        indices: &[Index],
        tolerance: T,
        prune_unused: PruneUnusedEnum,
        prune_degenerate: PruneDegenerateEnum,
        tolerance_policy: ToleranceEnum,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>
    where
        T: Scalar,
        Index: IndexType,
        Topology: TopologyPolicy,
        usize: AsPrimitive<T>,
        Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync;

    fn dedup_exact_dispatch<T, Index: IndexType, Vout, Topology, CheckFinite>(
        vertices: &[impl Into<[T; 3]> + Clone + Sync],
        indices: &[Index],
        prune_unused: PruneUnusedEnum,
        prune_degenerate: PruneDegenerateEnum,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>
    where
        T: Scalar,
        Topology: TopologyPolicy,
        Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync,
        CheckFinite: CheckFinitePolicy;
}

pub trait ThreadingKernel {
    fn dedup_vertices<T, Index, Vout, H, const AXIS: usize>(
        aabb: Aabb<T>,
        center: [T; 3],
        vertices: &[impl Into<[T; 3]> + Clone + Sync],
        tolerance: T,
        used_vertices: Option<Vob>,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>
    where
        T: ScalarKernel,
        Index: IndexType,
        Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync,
        H: HashType<T>,
        usize: AsPrimitive<T>;

    fn dedup_vertices_exact<T, Index, Vout, CheckFinite>(
        vertices: &[impl Into<[T; 3]> + Clone + Sync],
        used_vertices: Option<Vob>,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>
    where
        T: ScalarKernel,
        Index: IndexType,
        Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync,
        CheckFinite: CheckFinitePolicy;

    fn get_unused_vertices<T, Index>(
        vertices: &[impl Into<[T; 3]> + Clone + Sync],
        indices: &[Index],
    ) -> Result<Option<Vob>, DeDupError>
    where
        T: ScalarKernel,
        Index: IndexType;
}

// TODO: this should return a Result<_>
#[inline]
fn compute_centroid<T, const AXIS: usize>(
    indices: &[usize],
    input_vertices: &[impl Into<[T; 3]> + Clone + Sync],
) -> Option<[T; 3]>
where
    T: ScalarKernel,
    usize: AsPrimitive<T>,
{
    match indices.len() {
        0 => None,
        1 => Some(input_vertices.ᚦget(indices[0]).clone().into()),
        _ => {
            let mut sum_x = T::zero();
            let mut sum_y = sum_x;
            let mut sum_z = sum_x;
            for v in indices
                .iter()
                .map(|i| input_vertices.ᚦget(*i).clone().into())
            {
                sum_x += v[0];
                sum_y += v[1];
                sum_z += v[2];
            }
            let count = indices.len().as_();
            Some([sum_x / count, sum_y / count, sum_z / count])
        }
    }
}

pub(super) fn dedup_vertices_exact_from_iter<
    T,
    InputIndex,
    OutputIndex,
    Vin,
    Vout,
    Topology,
    CheckFinite,
    const PRUNE_DEGENERATE: bool,
>(
    indices_iter: impl IntoIterator<Item = InputIndex>,
    vertex_fn: impl Fn(InputIndex) -> Vin,
    vertices_len: usize,
) -> Result<(Vec<Vout>, Vec<OutputIndex>), DeDupError>
where
    T: ScalarKernel,
    InputIndex: Hash + Eq + Clone + Sync,
    OutputIndex: IndexType,
    Vin: Into<[T; 3]> + Clone + Sync,
    Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync,
    Topology: TopologyPolicy,
    CheckFinite: CheckFinitePolicy,
{
    let mut invalid_vertex = None;
    let mut remap: FxHashMap<InputIndex, OutputIndex> =
        FxHashMap::with_capacity_and_hasher(vertices_len, Default::default());
    #[allow(clippy::type_complexity)]
    let mut unique_vertices: FxHashMap<(T::Bits, T::Bits, T::Bits), OutputIndex> =
        FxHashMap::with_capacity_and_hasher(vertices_len, Default::default());
    let mut vertices: Vec<Vout> = Vec::with_capacity(vertices_len);
    let mut indices: Vec<OutputIndex> =
        Vec::with_capacity(vertices_len * Topology::VERTICES_TO_INDICES_RATIO);
    for index in indices_iter {
        let remapped_index = remap.entry(index.clone()).or_insert_with(|| {
            let mut vertex: [T; 3] = vertex_fn(index).into();
            if CheckFinite::CHECK_FINITE
                && !(vertex[0].is_finite() && vertex[1].is_finite() && vertex[2].is_finite())
            {
                invalid_vertex = Some(vertex);
                // the actual value is never used
                return OutputIndex::MAX;
            }

            // map +0.0 the same way as -0.0
            vertex[0] += T::zero();
            vertex[1] += T::zero();
            vertex[2] += T::zero();

            *unique_vertices.entry(vertex.get_hash()).or_insert_with(|| {
                let new_vertex_index = OutputIndex::from_usize(vertices.len());
                vertices.push(vertex.into());
                new_vertex_index
            })
        });

        if let Some(invalid_vertex) = invalid_vertex {
            return Err(DeDupError(
                format!("Non finite vertex coordinate found:{invalid_vertex:?}",).to_string(),
            ));
        }
        indices.push(*remapped_index);
        if PRUNE_DEGENERATE
            && Topology::INDICES_MODULUS > 1
            && indices.len() % Topology::INDICES_MODULUS == 0
        {
            Topology::fix_last_shunk::<T, OutputIndex>(&mut indices);
        }
    }

    if PRUNE_DEGENERATE {
        Topology::fix_final_shunk::<T, OutputIndex>(&mut indices);
    }
    Ok((vertices, indices))
}

#[allow(clippy::too_many_arguments)]
pub(crate) fn dedup_with_hash_type<
    T: ScalarKernel,
    Index: IndexType,
    Vout: Into<[T; 3]> + From<[T; 3]> + Clone + Sync,
    H: HashType<T>,
    Threading: ThreadingKernel,
    Topology: TopologyPolicy,
>(
    vertices: &[impl Into<[T; 3]> + Clone + Sync],
    indices: &[Index],
    aabb: Aabb<T>,
    center: [T; 3],
    longest_axis: usize,
    tolerance: T,
    prune_degenerate: PruneDegenerateEnum,
    used_vertices: Option<Vob>,
) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>
where
    usize: AsPrimitive<T>,
    i32: AsPrimitive<T>,
{
    let (vertices, remap) = match longest_axis {
        0 => Threading::dedup_vertices::<T, Index, Vout, H, 0>(
            aabb,
            center,
            vertices,
            tolerance,
            used_vertices,
        )?,
        1 => Threading::dedup_vertices::<T, Index, Vout, H, 1>(
            aabb,
            center,
            vertices,
            tolerance,
            used_vertices,
        )?,
        2 => Threading::dedup_vertices::<T, Index, Vout, H, 2>(
            aabb,
            center,
            vertices,
            tolerance,
            used_vertices,
        )?,
        _ => unreachable!(),
    };

    match prune_degenerate {
        PruneDegenerate => {
            Topology::remap_indices::<T, Index, Vout, true>(vertices, indices, &remap)
        }
        KeepDegenerate => {
            Topology::remap_indices::<T, Index, Vout, false>(vertices, indices, &remap)
        }
    }
}

impl<T: ThreadingDispatch> ThreadingPolicy for T {}