dedup_mesh 0.2.0

// SPDX-License-Identifier: MIT OR Apache-2.0
// Copyright (c) 2025 lacklustr@protonmail.com https://github.com/eadf

#[cfg(test)]
mod tests;

use crate::IndexType;
use crate::hash_grid::{SignedPrimInt, get_range};
use crate::prelude::*;
use crate::threading::{ThreadingKernel, dedup_with_hash_type};
use crate::util::Aabb;
use num_traits::{Float, ToPrimitive};
use std::fmt::Debug;
use std::hash::Hash;
use std::ops::AddAssign;
use vob::Vob;

/// The primitive vertex value type, i.e. f32 or f64
pub trait ScalarKernel: Float + Debug + AddAssign + Send + Sync + 'static {
    type Bits: Hash + Eq + Copy;

    fn to_bits(self) -> Self::Bits;
    fn distance_squared(a: [Self; 3], b: [Self; 3]) -> Self;

    const ZERO: Self;
    const ONE: Self;
    const TWO: Self;
    const QUARTER_OF_MAX: Self;
    const FOUR: Self;
    const HALF: Self;

    /// Analyzes vertex range and calls the appropriate deduplication implementation
    fn dedup_with_optimal_hash<
        Index: IndexType,
        Vout: Into<[Self; 3]> + From<[Self; 3]> + Clone + Sync,
        Threading: ThreadingKernel,
        Topology: TopologyPolicy,
    >(
        vertices: &[impl Into<[Self; 3]> + Clone + Sync],
        indices: &[Index],
        tolerance: Self,
        prune_degenerate: PruneDegenerateEnum,
        used_vertices: Option<Vob>,
        allow_scaling: bool,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError>;

    fn optimize_grid_tolerance_and_type(
        longest_axis: Self,
        tolerance: Self,
        allow_scaling: bool,
    ) -> Result<(Self, SignedPrimInt), DeDupError>;
}

impl ScalarKernel for f32 {
    const ZERO: Self = 0.0;
    const ONE: Self = 1.0;
    const TWO: Self = 2.0;
    const QUARTER_OF_MAX: Self = f32::MAX / 4.0;
    const FOUR: Self = 4.0;
    const HALF: Self = 0.5;
    type Bits = u32;

    #[inline(always)]
    fn to_bits(self) -> Self::Bits {
        <f32>::to_bits(self)
    }

    #[cfg(not(feature = "simd"))]
    #[inline(always)]
    fn distance_squared(a: [f32; 3], b: [f32; 3]) -> f32 {
        (a[0] - b[0]).powi(2) + (a[1] - b[1]).powi(2) + (a[2] - b[2]).powi(2)
    }

    #[cfg(feature = "simd")]
    #[inline(always)]
    fn distance_squared(a: [f32; 3], b: [f32; 3]) -> f32 {
        use wide::f32x4;
        let a_vec = f32x4::new([a[0], a[1], a[2], 0.0]);
        let b_vec = f32x4::new([b[0], b[1], b[2], 0.0]);
        let diff = a_vec - b_vec;
        let squared = diff * diff;
        let squared = squared.as_array_ref();
        squared[0] + squared[1] + squared[2]
    }

    /// Analyzes vertex range and calls the appropriate deduplication implementation
    fn dedup_with_optimal_hash<
        Index: IndexType,
        Vout: Into<[f32; 3]> + From<[f32; 3]> + Clone + Sync,
        Threading: ThreadingKernel,
        Topology: TopologyPolicy,
    >(
        vertices: &[impl Into<[Self; 3]> + Clone + Sync],
        indices: &[Index],
        tolerance: Self,
        prune_degenerate: PruneDegenerateEnum,
        used_vertices: Option<Vob>,
        allow_scaling: bool,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError> {
        let aabb = Aabb::<Self>::new_from_checked_data(vertices, &used_vertices)?;
        let (longest_axis, center) = aabb.longest_axis_and_center();

        match get_range(aabb.clone(), center, longest_axis, tolerance, allow_scaling)? {
            (updated_tolerance, SignedPrimInt::I16) => {
                dedup_with_hash_type::<Self, Index, Vout, i16, Threading, Topology>(
                    vertices,
                    indices,
                    aabb,
                    center,
                    longest_axis,
                    updated_tolerance,
                    prune_degenerate,
                    used_vertices,
                )
            }
            (updated_tolerance, SignedPrimInt::I32) => {
                dedup_with_hash_type::<Self, Index, Vout, i32, Threading, Topology>(
                    vertices,
                    indices,
                    aabb,
                    center,
                    longest_axis,
                    updated_tolerance,
                    prune_degenerate,
                    used_vertices,
                )
            }
            _ => unreachable!(),
        }
    }

    fn optimize_grid_tolerance_and_type(
        longest_axis: Self,
        tolerance: Self,
        allow_scaling: bool,
    ) -> Result<(Self, SignedPrimInt), DeDupError> {
        let grid_size = tolerance * Self::TWO;
        let longest_axis = longest_axis.abs();

        // -8.0 is to give room for all surrounding grid cells + some margin.
        const MANTISSA_LIMIT: f32 = ((1_u64 << f32::MANTISSA_DIGITS) - 8) as f32;
        let max_safe_coord = MANTISSA_LIMIT * grid_size;

        //println!("tolerance: {tolerance}");
        //println!("MANTISSA_LIMIT: {MANTISSA_LIMIT}");
        //println!("max_safe_coord: {max_safe_coord}");
        //println!("longest_axis: {}", longest_axis.abs());

        let updated_tolerance = if longest_axis >= max_safe_coord {
            if allow_scaling {
                longest_axis.abs() / (MANTISSA_LIMIT * Self::TWO)
            } else {
                #[allow(clippy::uninlined_format_args)]
                return Err(DeDupError(format!(
                    "Aabb max centered coordinate {} too large for grid_size {}. Maximum safe coordinate: {}",
                    longest_axis, grid_size, max_safe_coord
                )));
            }
        } else {
            tolerance
        };

        let grid_size = updated_tolerance * Self::TWO;
        debug_assert!(if allow_scaling {
            let max_safe_coord = MANTISSA_LIMIT * grid_size;
            longest_axis >= max_safe_coord
        } else {
            true
        });
        /*
                println!("tolerance:         {tolerance}");
                println!("updated_tolerance: {updated_tolerance}");
                println!(
                    "(longest_axis / grid_size).to_i128(): {}",
                    (longest_axis / grid_size).to_i128().unwrap()
                );
                println!("i16::MAX:{}", i16::MAX);
                println!("i32::MAX:{}", i32::MAX);
        */
        if let Some(max_abs_i128) = (longest_axis / grid_size).to_i128() {
            // Check each type from smallest to largest (only need to check positive limit)
            if max_abs_i128 < i16::MAX as i128 {
                //println!("picked i16 : {}", i16::MAX);
                Ok((updated_tolerance, SignedPrimInt::I16))
            } else if max_abs_i128 < i32::MAX as i128 {
                //println!("picked i32 : {}", i32::MAX);
                Ok((updated_tolerance, SignedPrimInt::I32))
            } else {
                unreachable!()
            }
        } else {
            unreachable!()
        }
    }
}

impl ScalarKernel for f64 {
    const ZERO: Self = 0.0;
    const ONE: Self = 1.0;
    const TWO: Self = 2.0;
    const QUARTER_OF_MAX: Self = f64::MAX / 4.0;
    const FOUR: Self = 4.0;
    const HALF: Self = 0.5;
    type Bits = u64;

    #[inline(always)]
    fn to_bits(self) -> Self::Bits {
        <f64>::to_bits(self)
    }

    #[cfg(not(feature = "simd"))]
    #[inline(always)]
    fn distance_squared(a: [Self; 3], b: [Self; 3]) -> Self {
        (a[0] - b[0]).powi(2) + (a[1] - b[1]).powi(2) + (a[2] - b[2]).powi(2)
    }

    #[cfg(feature = "simd")]
    #[inline(always)]
    fn distance_squared(a: [Self; 3], b: [Self; 3]) -> Self {
        use wide::f64x4;
        let a_vec = f64x4::new([a[0], a[1], a[2], 0.0]);
        let b_vec = f64x4::new([b[0], b[1], b[2], 0.0]);
        let diff = a_vec - b_vec;
        let squared = diff * diff;
        let squared = squared.as_array_ref();
        squared[0] + squared[1] + squared[2]
    }

    fn optimize_grid_tolerance_and_type(
        longest_axis: Self,
        tolerance: Self,
        allow_scaling: bool,
    ) -> Result<(f64, SignedPrimInt), DeDupError> {
        let grid_size = tolerance * Self::TWO;
        let longest_axis = longest_axis.abs();

        // -8.0 is to give room for all surrounding grid cells + some margin.
        const MANTISSA_LIMIT: f64 = ((1_u64 << f64::MANTISSA_DIGITS) - 8) as f64;

        let max_safe_coord = MANTISSA_LIMIT * grid_size;
        /*
                println!("tolerance: {tolerance}");
                println!("MANTISSA_LIMIT: {MANTISSA_LIMIT}");
                println!("max_safe_coord: {max_safe_coord}");
                println!("longest_axis: {}", longest_axis.abs());
        */
        let updated_tolerance = if longest_axis >= max_safe_coord {
            if allow_scaling {
                longest_axis.abs() / (MANTISSA_LIMIT * Self::TWO)
            } else {
                #[allow(clippy::uninlined_format_args)]
                return Err(DeDupError(format!(
                    "Aabb max centered coordinate {} too large for grid_size {}. Maximum safe coordinate: {}",
                    longest_axis, grid_size, max_safe_coord
                )));
            }
        } else {
            tolerance
        };

        let grid_size = updated_tolerance * Self::TWO;
        debug_assert!(if allow_scaling {
            let max_safe_coord = MANTISSA_LIMIT * grid_size;
            longest_axis >= max_safe_coord
        } else {
            true
        });
        /*
                println!("tolerance:         {tolerance}");
                println!("updated_tolerance: {updated_tolerance}");
                println!(
                    "(longest_axis / grid_size).to_i128(): {}",
                    (longest_axis / grid_size).to_i128().unwrap()
                );
                println!("i16::MAX:{}", i16::MAX);
                println!("i32::MAX:{}", i32::MAX);
                println!("i64::MAX:{}", i64::MAX);
        */
        if let Some(max_abs_i128) = (longest_axis / grid_size).to_i128() {
            // Check each type from smallest to largest (only need to check positive limit)
            if max_abs_i128 < i16::MAX as i128 {
                // println!("picked i16 : {}", i16::MAX);
                Ok((updated_tolerance, SignedPrimInt::I16))
            } else if max_abs_i128 < i32::MAX as i128 {
                // println!("picked i32 : {}", i32::MAX);
                Ok((updated_tolerance, SignedPrimInt::I32))
            } else {
                // println!("picked i64 : {}", i64::MAX);
                Ok((updated_tolerance, SignedPrimInt::I64))
            }
        } else {
            unreachable!()
        }
    }

    /// Analyzes vertex range and calls the appropriate deduplication implementation
    fn dedup_with_optimal_hash<
        Index: IndexType,
        Vout: Into<[Self; 3]> + From<[Self; 3]> + Clone + Sync,
        Threading: ThreadingKernel,
        Topology: TopologyPolicy,
    >(
        vertices: &[impl Into<[Self; 3]> + Clone + Sync],
        indices: &[Index],
        tolerance: Self,
        prune_degenerate: PruneDegenerateEnum,
        used_vertices: Option<Vob>,
        allow_scaling: bool,
    ) -> Result<(Vec<Vout>, Vec<Index>), DeDupError> {
        let aabb = Aabb::<Self>::new_from_checked_data(vertices, &used_vertices)?;
        let (longest_axis, center) = aabb.longest_axis_and_center();

        match get_range(
            aabb.clone(),
            center,
            longest_axis,
            tolerance * 2.0,
            allow_scaling,
        )? {
            (updated_tolerance, SignedPrimInt::I16) => {
                dedup_with_hash_type::<Self, Index, Vout, i16, Threading, Topology>(
                    vertices,
                    indices,
                    aabb,
                    center,
                    longest_axis,
                    updated_tolerance,
                    prune_degenerate,
                    used_vertices,
                )
            }
            (updated_tolerance, SignedPrimInt::I32) => {
                dedup_with_hash_type::<Self, Index, Vout, i32, Threading, Topology>(
                    vertices,
                    indices,
                    aabb,
                    center,
                    longest_axis,
                    updated_tolerance,
                    prune_degenerate,
                    used_vertices,
                )
            }
            (updated_tolerance, SignedPrimInt::I64) => {
                dedup_with_hash_type::<Self, Index, Vout, i64, Threading, Topology>(
                    vertices,
                    indices,
                    aabb,
                    center,
                    longest_axis,
                    updated_tolerance,
                    prune_degenerate,
                    used_vertices,
                )
            }
        }
    }
}

impl<T: ScalarKernel> Scalar for T {}