#![forbid(unsafe_code)]
use crate::core::facet::{FacetError, FacetView};
use crate::core::traits::data_type::DataType;
use crate::core::triangulation_data_structure::Tds;
use crate::geometry::algorithms::convex_hull::ConvexHull;
use crate::geometry::point::Point;
use crate::geometry::traits::coordinate::{CoordinateScalar, ScalarAccumulative};
use std::collections::HashSet;
use thiserror::Error;
#[derive(Clone, Debug, Error, PartialEq, Eq)]
pub enum JaccardComputationError {
#[error(
"Set sizes exceed safe f64 conversion limit (2^53): intersection={intersection}, union={union}"
)]
SetSizeTooLarge {
intersection: usize,
union: usize,
},
}
fn compute_set_metrics<T, S>(
a: &std::collections::HashSet<T, S>,
b: &std::collections::HashSet<T, S>,
) -> (usize, usize)
where
T: Eq + std::hash::Hash,
S: std::hash::BuildHasher,
{
let (small, large) = if a.len() <= b.len() { (a, b) } else { (b, a) };
let intersection = small.iter().filter(|x| large.contains(x)).count();
let union = a.len() + b.len() - intersection;
(intersection, union)
}
pub fn jaccard_index<T, S>(
a: &std::collections::HashSet<T, S>,
b: &std::collections::HashSet<T, S>,
) -> Result<f64, JaccardComputationError>
where
T: Eq + std::hash::Hash,
S: std::hash::BuildHasher,
{
const MAX_SAFE_INT_U128: u128 = 1u128 << 53;
if a.is_empty() && b.is_empty() {
return Ok(1.0);
}
let (intersection, union) = compute_set_metrics(a, b);
if (intersection as u128) > MAX_SAFE_INT_U128 || (union as u128) > MAX_SAFE_INT_U128 {
return Err(JaccardComputationError::SetSizeTooLarge {
intersection,
union,
});
}
#[expect(
clippy::cast_precision_loss,
reason = "Safe: intersection/union are bounded to 2^53 before casting to f64"
)]
let inter_f64 = intersection as f64;
#[expect(
clippy::cast_precision_loss,
reason = "Safe: intersection/union are bounded to 2^53 before casting to f64"
)]
let union_f64 = union as f64;
Ok(inter_f64 / union_f64)
}
#[inline]
pub fn jaccard_distance<T, S>(
a: &std::collections::HashSet<T, S>,
b: &std::collections::HashSet<T, S>,
) -> Result<f64, JaccardComputationError>
where
T: Eq + std::hash::Hash,
S: std::hash::BuildHasher,
{
Ok(1.0 - jaccard_index(a, b)?)
}
#[must_use]
pub fn extract_vertex_coordinate_set<T, U, V, const D: usize>(
tds: &Tds<T, U, V, D>,
) -> HashSet<Point<T, D>>
where
T: CoordinateScalar,
U: DataType,
V: DataType,
{
tds.vertices().map(|(_, vertex)| *vertex.point()).collect()
}
const fn canonical_edge(u: u128, v: u128) -> (u128, u128) {
if u <= v { (u, v) } else { (v, u) }
}
pub fn extract_edge_set<T, U, V, const D: usize>(
tds: &Tds<T, U, V, D>,
) -> Result<HashSet<(u128, u128)>, FacetError>
where
T: CoordinateScalar,
U: DataType,
V: DataType,
{
let mut edges = HashSet::new();
for (_, cell) in tds.cells() {
let vertex_keys = cell.vertices();
for i in 0..vertex_keys.len() {
for j in (i + 1)..vertex_keys.len() {
let v_i = tds.get_vertex_by_key(vertex_keys[i]).ok_or(
FacetError::VertexKeyNotFoundInTriangulation {
key: vertex_keys[i],
},
)?;
let v_j = tds.get_vertex_by_key(vertex_keys[j]).ok_or(
FacetError::VertexKeyNotFoundInTriangulation {
key: vertex_keys[j],
},
)?;
let uuid_i = v_i.uuid().as_u128();
let uuid_j = v_j.uuid().as_u128();
edges.insert(canonical_edge(uuid_i, uuid_j));
}
}
}
Ok(edges)
}
pub fn extract_facet_identifier_set<T, U, V, const D: usize>(
tds: &Tds<T, U, V, D>,
) -> Result<HashSet<u64>, FacetError>
where
T: ScalarAccumulative,
U: DataType,
V: DataType,
{
use crate::core::traits::boundary_analysis::BoundaryAnalysis;
let mut facet_ids = HashSet::new();
let boundary_facets =
tds.boundary_facets()
.map_err(|e| FacetError::BoundaryFacetRetrievalFailed {
source: std::sync::Arc::new(e),
})?;
for facet_view in boundary_facets {
let facet_id = facet_view.key()?;
facet_ids.insert(facet_id);
}
Ok(facet_ids)
}
pub fn extract_hull_facet_set<K, U, V, const D: usize>(
hull: &ConvexHull<K, U, V, D>,
tri: &crate::core::triangulation::Triangulation<K, U, V, D>,
) -> Result<HashSet<u64>, FacetError>
where
K: crate::geometry::kernel::Kernel<D>,
K::Scalar: ScalarAccumulative,
U: DataType,
V: DataType,
{
let tds = &tri.tds;
let mut facet_ids = HashSet::new();
for facet_handle in hull.facets() {
let facet_view = FacetView::new(tds, facet_handle.cell_key(), facet_handle.facet_index())?;
let facet_id = facet_view.key()?;
facet_ids.insert(facet_id);
}
Ok(facet_ids)
}
pub fn format_jaccard_report<T, S>(
a: &HashSet<T, S>,
b: &HashSet<T, S>,
label_a: &str,
label_b: &str,
) -> Result<String, JaccardComputationError>
where
T: Eq + std::hash::Hash + std::fmt::Debug,
S: std::hash::BuildHasher,
{
const MAX_SAFE_INT_U128: u128 = 1u128 << 53;
let size_a = a.len();
let size_b = b.len();
let (intersection, union) = compute_set_metrics(a, b);
let jaccard = if union == 0 {
1.0
} else {
if (intersection as u128) > MAX_SAFE_INT_U128 || (union as u128) > MAX_SAFE_INT_U128 {
return Err(JaccardComputationError::SetSizeTooLarge {
intersection,
union,
});
}
#[expect(
clippy::cast_precision_loss,
reason = "Safe: intersection/union are bounded to 2^53 before casting to f64"
)]
let inter_f64 = intersection as f64;
#[expect(
clippy::cast_precision_loss,
reason = "Safe: intersection/union are bounded to 2^53 before casting to f64"
)]
let union_f64 = union as f64;
inter_f64 / union_f64
};
let only_in_a: Vec<_> = a.iter().filter(|x| !b.contains(x)).take(5).collect();
let only_in_b: Vec<_> = b.iter().filter(|x| !a.contains(x)).take(5).collect();
Ok(format!(
"\nâ•─ Jaccard Similarity Report ─────────────────────────────\n\
│ {label_a}: {size_a} elements\n\
│ {label_b}: {size_b} elements\n\
│ Intersection: {intersection}\n\
│ Union: {union}\n\
│ Jaccard Index: {jaccard:.6}\n\
├─ Symmetric Difference (sample) ────────────────────────\n\
│ Only in {label_a} (first 5): {only_in_a:?}\n\
│ Only in {label_b} (first 5): {only_in_b:?}\n\
╰─────────────────────────────────────────────────────────\n"
))
}
#[macro_export]
macro_rules! assert_jaccard_gte {
($a:expr, $b:expr, $threshold:expr $(,)?) => {
$crate::assert_jaccard_gte!($a, $b, $threshold, "Jaccard index assertion")
};
($a:expr, $b:expr, $threshold:expr, $($label:tt)*) => {{
let a_ref = $a;
let b_ref = $b;
let threshold = $threshold;
let jaccard_index = $crate::core::util::jaccard_index(a_ref, b_ref)
.expect("Jaccard computation should not overflow for reasonable test sets");
if jaccard_index < threshold {
let report = $crate::core::util::format_jaccard_report(
a_ref,
b_ref,
"Set A",
"Set B"
)
.expect("Failed to format Jaccard report - set sizes too large");
panic!(
"Jaccard assertion failed: {}\n\
Expected: Jaccard index ≥ {:.6}\n\
Actual: {:.6}\n\
{}",
format!($($label)*),
threshold,
jaccard_index,
report
);
}
}};
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::triangulation_data_structure::{Tds, VertexKey};
use crate::geometry::traits::coordinate::Coordinate;
use crate::vertex;
use approx::assert_relative_eq;
use slotmap::KeyData;
macro_rules! gen_jaccard_set_tests {
($name:ident, $dim:literal) => {
#[test]
fn $name() {
use std::collections::HashSet;
let empty: HashSet<[i32; $dim]> = HashSet::new();
assert_relative_eq!(jaccard_index(&empty, &empty).unwrap(), 1.0, epsilon = 1e-12);
assert_relative_eq!(
jaccard_distance(&empty, &empty).unwrap(),
0.0,
epsilon = 1e-12
);
let a: HashSet<[i32; $dim]> = HashSet::from([[1; $dim], [2; $dim], [3; $dim]]);
let b: HashSet<[i32; $dim]> = HashSet::from([[3; $dim], [4; $dim]]);
assert_relative_eq!(jaccard_index(&a, &a).unwrap(), 1.0, epsilon = 1e-12);
assert_relative_eq!(jaccard_distance(&a, &a).unwrap(), 0.0, epsilon = 1e-12);
assert_relative_eq!(jaccard_index(&a, &b).unwrap(), 0.25, epsilon = 1e-12);
assert_relative_eq!(jaccard_distance(&a, &b).unwrap(), 0.75, epsilon = 1e-12);
assert_relative_eq!(jaccard_index(&a, &empty).unwrap(), 0.0, epsilon = 1e-12);
assert_relative_eq!(jaccard_distance(&a, &empty).unwrap(), 1.0, epsilon = 1e-12);
}
};
}
gen_jaccard_set_tests!(jaccard_basic_properties_2d, 2);
gen_jaccard_set_tests!(jaccard_basic_properties_3d, 3);
gen_jaccard_set_tests!(jaccard_basic_properties_4d, 4);
gen_jaccard_set_tests!(jaccard_basic_properties_5d, 5);
#[test]
fn test_canonical_edge() {
assert_eq!(canonical_edge(1, 2), (1, 2));
assert_eq!(canonical_edge(2, 1), (1, 2));
assert_eq!(canonical_edge(9, 2), (2, 9));
assert_eq!(canonical_edge(100, 50), (50, 100));
assert_eq!(canonical_edge(5, 5), (5, 5));
}
#[test]
fn test_set_extraction_utilities_comprehensive() {
let vertices = vec![
vertex!([0.0, 0.0, 0.0]),
vertex!([1.0, 0.0, 0.0]),
vertex!([0.0, 1.0, 0.0]),
vertex!([0.0, 0.0, 1.0]),
];
let dt =
crate::core::delaunay_triangulation::DelaunayTriangulation::new(&vertices).unwrap();
let tds = &dt.as_triangulation().tds;
let coord_set = extract_vertex_coordinate_set(tds);
assert_eq!(coord_set.len(), 4, "Should have 4 unique coordinates");
assert!(coord_set.contains(&Point::new([0.0, 0.0, 0.0])));
assert!(coord_set.contains(&Point::new([1.0, 0.0, 0.0])));
assert!(coord_set.contains(&Point::new([0.0, 1.0, 0.0])));
assert!(coord_set.contains(&Point::new([0.0, 0.0, 1.0])));
let edge_set = extract_edge_set(tds).unwrap();
assert_eq!(edge_set.len(), 6, "Tetrahedron should have 6 edges");
let facet_set = extract_facet_identifier_set(tds).unwrap();
assert_eq!(facet_set.len(), 4, "Tetrahedron should have 4 facets");
let dt_hull =
crate::core::delaunay_triangulation::DelaunayTriangulation::new(&vertices).unwrap();
let tri = dt_hull.as_triangulation();
let hull = ConvexHull::from_triangulation(tri).unwrap();
let hull_facet_set = extract_hull_facet_set(&hull, tri).unwrap();
assert_eq!(hull_facet_set.len(), 4, "Hull should have 4 facets");
}
#[test]
fn test_extract_edge_set_errors_on_missing_vertex_key() {
use crate::core::facet::FacetError;
let vertices = vec![
vertex!([0.0, 0.0, 0.0]),
vertex!([1.0, 0.0, 0.0]),
vertex!([0.0, 1.0, 0.0]),
vertex!([0.0, 0.0, 1.0]),
];
let mut dt =
crate::core::delaunay_triangulation::DelaunayTriangulation::new(&vertices).unwrap();
let cell_key = dt.as_triangulation().tds.cell_keys().next().unwrap();
let invalid_vkey = VertexKey::from(KeyData::from_ffi(u64::MAX));
dt.tri
.tds
.get_cell_by_key_mut(cell_key)
.unwrap()
.push_vertex_key(invalid_vkey);
let err = extract_edge_set(&dt.as_triangulation().tds).unwrap_err();
assert!(matches!(
err,
FacetError::VertexKeyNotFoundInTriangulation { key } if key == invalid_vkey
));
}
#[test]
fn test_extract_facet_identifier_set_errors_on_boundary_facet_retrieval_failure() {
use crate::core::facet::FacetError;
let vertices = vec![
vertex!([0.0, 0.0, 0.0]),
vertex!([1.0, 0.0, 0.0]),
vertex!([0.0, 1.0, 0.0]),
vertex!([0.0, 0.0, 1.0]),
];
let mut dt =
crate::core::delaunay_triangulation::DelaunayTriangulation::new(&vertices).unwrap();
let cell_key = dt.as_triangulation().tds.cell_keys().next().unwrap();
let invalid_vkey = VertexKey::from(KeyData::from_ffi(u64::MAX));
dt.tri
.tds
.get_cell_by_key_mut(cell_key)
.unwrap()
.push_vertex_key(invalid_vkey);
let err = extract_facet_identifier_set(&dt.as_triangulation().tds).unwrap_err();
assert!(matches!(
err,
FacetError::BoundaryFacetRetrievalFailed { .. }
));
}
#[test]
fn test_format_jaccard_report_includes_metrics_and_handles_empty_sets() {
use std::collections::HashSet;
let empty: HashSet<i32> = HashSet::new();
let report = format_jaccard_report(&empty, &empty, "A", "B").unwrap();
assert!(report.contains("A: 0 elements"));
assert!(report.contains("B: 0 elements"));
assert!(report.contains("Intersection: 0"));
assert!(report.contains("Union: 0"));
assert!(report.contains("Jaccard Index: 1"));
let a: HashSet<_> = [1, 2, 3].into_iter().collect();
let b: HashSet<_> = [3, 4].into_iter().collect();
let report = format_jaccard_report(&a, &b, "A", "B").unwrap();
assert!(report.contains("Intersection: 1"));
assert!(report.contains("Union: 4"));
}
#[test]
fn test_extract_hull_facet_set_consistent_with_boundary_facets() {
let vertices = vec![
vertex!([0.0, 0.0, 0.0]),
vertex!([1.0, 0.0, 0.0]),
vertex!([0.0, 1.0, 0.0]),
vertex!([0.0, 0.0, 1.0]),
];
let dt =
crate::core::delaunay_triangulation::DelaunayTriangulation::new(&vertices).unwrap();
let tri = dt.as_triangulation();
let hull = ConvexHull::from_triangulation(tri).unwrap();
let hull_facet_set = extract_hull_facet_set(&hull, tri).unwrap();
let boundary_set = extract_facet_identifier_set(&tri.tds).unwrap();
assert_eq!(hull_facet_set, boundary_set);
}
#[test]
fn test_extract_edge_set_empty_tds_is_empty() {
let tds: Tds<f64, (), (), 3> = Tds::empty();
let edges = extract_edge_set(&tds).unwrap();
assert!(edges.is_empty());
}
}