Skip to main content

packed_spatial_index/
triangle.rs

1//! Fixed-width triangle payload records, in `f64` (default) or `f32` (compact).
2//!
3//! A triangle is a natural **fixed-width** payload: 6 (2D) or 9 (3D) coordinates,
4//! the same size for every item. Stored through the serializer's fixed-width
5//! path (`.triangles(..)` / `.records(..)`) it needs no offset table, so the file
6//! is smaller and a view can borrow the records as a zero-copy typed slice. Pair
7//! a triangle payload with an index built over the triangles' bounding boxes
8//! ([`Index3D::from_triangles`](crate::Index3D::from_triangles)) to get a
9//! streamable bounding-volume hierarchy over a mesh.
10//!
11//! Following the crate convention ([`Box3D`] / [`SimdIndex3D`](crate::SimdIndex3D)
12//! are `f64`, the `F32` suffix is the compact variant), [`Triangle3D`] stores
13//! `f64` vertices and [`Triangle3DF32`] stores `f32`. The `f32` ray-triangle path
14//! uses an explicit SIMD kernel with the `simd` feature; the `f64` path is scalar.
15
16use crate::geometry::{Box2D, Box3D};
17
18mod sealed {
19    pub trait Sealed {}
20}
21
22/// A ray-triangle intersection: the `index` of the hit triangle in the queried
23/// slice, and the ray parameter `t` in direction-length units (the same `t`
24/// convention as [`crate::Ray3D`]). Returned by [`crate::Ray3D::closest_triangle`].
25#[derive(Clone, Copy, Debug, PartialEq)]
26pub struct TriangleHit {
27    /// Index of the hit triangle in the slice passed to the query.
28    pub index: usize,
29    /// Ray parameter at the hit, in direction-length units.
30    pub t: f64,
31}
32
33/// A 2D triangle record (`f64` or `f32` vertices). Implemented by [`Triangle2D`]
34/// and [`Triangle2DF32`]; sealed, so the set of record types is fixed.
35pub trait Triangle2: Copy + sealed::Sealed {
36    /// Byte stride of one record.
37    const STRIDE: usize;
38    /// The triangle's axis-aligned bounding box (always `f64`, for the index).
39    fn aabb(&self) -> Box2D;
40    #[doc(hidden)]
41    fn read_le(bytes: &[u8]) -> Self;
42}
43
44/// A 3D triangle record (`f64` or `f32` vertices). Implemented by [`Triangle3D`]
45/// and [`Triangle3DF32`]; sealed, so the set of record types is fixed.
46pub trait Triangle3: Copy + sealed::Sealed {
47    /// Byte stride of one record.
48    const STRIDE: usize;
49    /// The triangle's axis-aligned bounding box (always `f64`, for the index).
50    fn aabb(&self) -> Box3D;
51    #[doc(hidden)]
52    fn read_le(bytes: &[u8]) -> Self;
53    /// Closest hit of the ray segment `(o, d, max_t)` over `tris`, computed in the
54    /// record's own precision (`f32` uses the SIMD kernel under the `simd`
55    /// feature). Crate-internal; drive it through [`crate::Ray3D::closest_triangle`].
56    #[doc(hidden)]
57    fn closest_hit(o: [f64; 3], d: [f64; 3], max_t: f64, tris: &[Self]) -> Option<TriangleHit>
58    where
59        Self: Sized;
60}
61
62macro_rules! tri2 {
63    ($name:ident, $t:ty, $stride:expr) => {
64        /// A 2D triangle: three vertices `[x, y]`. `repr(C)`, unpadded, so a slice
65        /// casts to and from the on-disk fixed-width payload with no copy.
66        #[repr(C)]
67        #[derive(Clone, Copy, Debug, PartialEq)]
68        pub struct $name {
69            /// First vertex `[x, y]`.
70            pub a: [$t; 2],
71            /// Second vertex `[x, y]`.
72            pub b: [$t; 2],
73            /// Third vertex `[x, y]`.
74            pub c: [$t; 2],
75        }
76
77        impl $name {
78            /// Create a triangle from three vertices.
79            #[inline]
80            pub const fn new(a: [$t; 2], b: [$t; 2], c: [$t; 2]) -> Self {
81                Self { a, b, c }
82            }
83        }
84
85        impl sealed::Sealed for $name {}
86        impl Triangle2 for $name {
87            const STRIDE: usize = $stride;
88            #[inline]
89            fn aabb(&self) -> Box2D {
90                let min_x = self.a[0].min(self.b[0]).min(self.c[0]);
91                let min_y = self.a[1].min(self.b[1]).min(self.c[1]);
92                let max_x = self.a[0].max(self.b[0]).max(self.c[0]);
93                let max_y = self.a[1].max(self.b[1]).max(self.c[1]);
94                Box2D::new(min_x as f64, min_y as f64, max_x as f64, max_y as f64)
95            }
96            #[inline]
97            fn read_le(b: &[u8]) -> Self {
98                let mut v = [0 as $t; 6];
99                read_le_into::<$t>(b, &mut v);
100                Self {
101                    a: [v[0], v[1]],
102                    b: [v[2], v[3]],
103                    c: [v[4], v[5]],
104                }
105            }
106        }
107    };
108}
109
110tri2!(Triangle2D, f64, 48);
111tri2!(Triangle2DF32, f32, 24);
112
113impl Triangle2D {
114    /// Whether this triangle's filled area overlaps the axis-aligned box `bx`.
115    ///
116    /// The 2D separating-axis test: the box's two axes plus the triangle's three
117    /// edge normals. This is the predicate behind `Index2D::search_triangle` /
118    /// `any_triangle` / `visit_triangle`.
119    #[inline]
120    pub fn overlaps_box(&self, bx: Box2D) -> bool {
121        let v = [self.a, self.b, self.c];
122        // Box axes.
123        let lo = v[0][0].min(v[1][0]).min(v[2][0]);
124        let hi = v[0][0].max(v[1][0]).max(v[2][0]);
125        if hi < bx.min_x || lo > bx.max_x {
126            return false;
127        }
128        let lo = v[0][1].min(v[1][1]).min(v[2][1]);
129        let hi = v[0][1].max(v[1][1]).max(v[2][1]);
130        if hi < bx.min_y || lo > bx.max_y {
131            return false;
132        }
133        // Triangle edge normals.
134        let corners = [
135            [bx.min_x, bx.min_y],
136            [bx.max_x, bx.min_y],
137            [bx.min_x, bx.max_y],
138            [bx.max_x, bx.max_y],
139        ];
140        for e in 0..3 {
141            let p = v[e];
142            let q = v[(e + 1) % 3];
143            let (ax, ay) = (-(q[1] - p[1]), q[0] - p[0]);
144            let mut tlo = f64::INFINITY;
145            let mut thi = f64::NEG_INFINITY;
146            for w in &v {
147                let d = w[0] * ax + w[1] * ay;
148                tlo = tlo.min(d);
149                thi = thi.max(d);
150            }
151            let mut blo = f64::INFINITY;
152            let mut bhi = f64::NEG_INFINITY;
153            for c in &corners {
154                let d = c[0] * ax + c[1] * ay;
155                blo = blo.min(d);
156                bhi = bhi.max(d);
157            }
158            if thi < blo || tlo > bhi {
159                return false;
160            }
161        }
162        true
163    }
164
165    /// Whether the box `bx` lies entirely inside this triangle (all four corners
166    /// inside the convex region). Used to accept a whole subtree without testing
167    /// its leaves. Always `false` for a degenerate (zero-area) triangle.
168    #[inline]
169    pub fn contains_box(&self, bx: Box2D) -> bool {
170        let area2 = (self.b[0] - self.a[0]) * (self.c[1] - self.a[1])
171            - (self.c[0] - self.a[0]) * (self.b[1] - self.a[1]);
172        if area2 == 0.0 {
173            return false;
174        }
175        self.contains_point([bx.min_x, bx.min_y])
176            && self.contains_point([bx.max_x, bx.min_y])
177            && self.contains_point([bx.min_x, bx.max_y])
178            && self.contains_point([bx.max_x, bx.max_y])
179    }
180
181    #[inline]
182    fn contains_point(&self, p: [f64; 2]) -> bool {
183        let edge = |a: [f64; 2], b: [f64; 2]| {
184            (b[0] - a[0]) * (p[1] - a[1]) - (b[1] - a[1]) * (p[0] - a[0])
185        };
186        let d1 = edge(self.a, self.b);
187        let d2 = edge(self.b, self.c);
188        let d3 = edge(self.c, self.a);
189        let has_neg = d1 < 0.0 || d2 < 0.0 || d3 < 0.0;
190        let has_pos = d1 > 0.0 || d2 > 0.0 || d3 > 0.0;
191        !(has_neg && has_pos)
192    }
193}
194
195macro_rules! tri3 {
196    ($name:ident, $t:ty, $stride:expr, $closest:path) => {
197        /// A 3D triangle: three vertices `[x, y, z]`. `repr(C)`, unpadded, so a
198        /// slice casts to and from the on-disk fixed-width payload with no copy.
199        #[repr(C)]
200        #[derive(Clone, Copy, Debug, PartialEq)]
201        pub struct $name {
202            /// First vertex `[x, y, z]`.
203            pub a: [$t; 3],
204            /// Second vertex `[x, y, z]`.
205            pub b: [$t; 3],
206            /// Third vertex `[x, y, z]`.
207            pub c: [$t; 3],
208        }
209
210        impl $name {
211            /// Create a triangle from three vertices.
212            #[inline]
213            pub const fn new(a: [$t; 3], b: [$t; 3], c: [$t; 3]) -> Self {
214                Self { a, b, c }
215            }
216        }
217
218        impl sealed::Sealed for $name {}
219        impl Triangle3 for $name {
220            const STRIDE: usize = $stride;
221            #[inline]
222            fn aabb(&self) -> Box3D {
223                let min_x = self.a[0].min(self.b[0]).min(self.c[0]);
224                let min_y = self.a[1].min(self.b[1]).min(self.c[1]);
225                let min_z = self.a[2].min(self.b[2]).min(self.c[2]);
226                let max_x = self.a[0].max(self.b[0]).max(self.c[0]);
227                let max_y = self.a[1].max(self.b[1]).max(self.c[1]);
228                let max_z = self.a[2].max(self.b[2]).max(self.c[2]);
229                Box3D::new(
230                    min_x as f64,
231                    min_y as f64,
232                    min_z as f64,
233                    max_x as f64,
234                    max_y as f64,
235                    max_z as f64,
236                )
237            }
238            #[inline]
239            fn read_le(b: &[u8]) -> Self {
240                let mut v = [0 as $t; 9];
241                read_le_into::<$t>(b, &mut v);
242                Self {
243                    a: [v[0], v[1], v[2]],
244                    b: [v[3], v[4], v[5]],
245                    c: [v[6], v[7], v[8]],
246                }
247            }
248            #[inline]
249            fn closest_hit(
250                o: [f64; 3],
251                d: [f64; 3],
252                max_t: f64,
253                tris: &[Self],
254            ) -> Option<TriangleHit> {
255                $closest(o, d, max_t, tris)
256            }
257        }
258    };
259}
260
261tri3!(Triangle3D, f64, 72, closest_f64);
262tri3!(Triangle3DF32, f32, 36, closest_f32);
263
264/// Decode little-endian floats from `bytes` into `out`. `T` is `f32` or `f64`.
265trait LeFloat: Copy {
266    const SIZE: usize;
267    fn from_le(b: &[u8]) -> Self;
268}
269impl LeFloat for f32 {
270    const SIZE: usize = 4;
271    #[inline]
272    fn from_le(b: &[u8]) -> Self {
273        f32::from_le_bytes([b[0], b[1], b[2], b[3]])
274    }
275}
276impl LeFloat for f64 {
277    const SIZE: usize = 8;
278    #[inline]
279    fn from_le(b: &[u8]) -> Self {
280        f64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]])
281    }
282}
283#[inline]
284fn read_le_into<T: LeFloat>(bytes: &[u8], out: &mut [T]) {
285    for (i, slot) in out.iter_mut().enumerate() {
286        *slot = T::from_le(&bytes[i * T::SIZE..]);
287    }
288}
289
290/// Reinterpret a triangle slice as its little-endian record bytes (no copy).
291///
292/// Sound because every triangle type is `repr(C)` over `f32`/`f64` only (no
293/// padding, every bit pattern valid) and call sites bound `T` to the sealed
294/// [`Triangle2`] / [`Triangle3`]. Produces the same bytes on little-endian
295/// targets, which the format already assumes throughout.
296#[inline]
297pub(crate) fn records_as_bytes<T: Copy>(records: &[T]) -> &[u8] {
298    // SAFETY: see the doc comment; `T` is a `repr(C)` all-float record type.
299    unsafe {
300        core::slice::from_raw_parts(
301            records.as_ptr() as *const u8,
302            std::mem::size_of_val(records),
303        )
304    }
305}
306
307/// Borrow a fixed-width blob region as a typed record slice, zero-copy, when the
308/// bytes are aligned (e.g. an mmap). `None` if `blobs` is not suitably aligned.
309#[inline]
310pub(crate) fn blobs_as_records<T: Copy>(blobs: &[u8]) -> Option<&[T]> {
311    // SAFETY: same record-type argument as `records_as_bytes`; `align_to` splits
312    // any unaligned prefix/suffix and we accept only a clean whole-slice cast.
313    let (prefix, mid, suffix) = unsafe { blobs.align_to::<T>() };
314    (prefix.is_empty() && suffix.is_empty()).then_some(mid)
315}
316
317// ---- ray-triangle closest hit ----
318
319/// Branchless Moller-Trumbore in `f64`: ray parameter `t`, or `+inf` on a miss.
320#[inline(always)]
321#[allow(clippy::manual_range_contains)] // branchless miss mask, kept for autovec
322fn mt_f64(t: &Triangle3D, o: [f64; 3], d: [f64; 3], max_t: f64) -> f64 {
323    let e1 = [t.b[0] - t.a[0], t.b[1] - t.a[1], t.b[2] - t.a[2]];
324    let e2 = [t.c[0] - t.a[0], t.c[1] - t.a[1], t.c[2] - t.a[2]];
325    let p = [
326        d[1] * e2[2] - d[2] * e2[1],
327        d[2] * e2[0] - d[0] * e2[2],
328        d[0] * e2[1] - d[1] * e2[0],
329    ];
330    let det = e1[0] * p[0] + e1[1] * p[1] + e1[2] * p[2];
331    let inv = 1.0 / det;
332    let s = [o[0] - t.a[0], o[1] - t.a[1], o[2] - t.a[2]];
333    let u = (s[0] * p[0] + s[1] * p[1] + s[2] * p[2]) * inv;
334    let q = [
335        s[1] * e1[2] - s[2] * e1[1],
336        s[2] * e1[0] - s[0] * e1[2],
337        s[0] * e1[1] - s[1] * e1[0],
338    ];
339    let v = (d[0] * q[0] + d[1] * q[1] + d[2] * q[2]) * inv;
340    let t = (e2[0] * q[0] + e2[1] * q[1] + e2[2] * q[2]) * inv;
341    let miss = (det.abs() < 1e-12)
342        | (u < 0.0)
343        | (u > 1.0)
344        | (v < 0.0)
345        | (u + v > 1.0)
346        | (t < 0.0)
347        | (t > max_t);
348    if miss { f64::INFINITY } else { t }
349}
350
351fn closest_f64(o: [f64; 3], d: [f64; 3], max_t: f64, tris: &[Triangle3D]) -> Option<TriangleHit> {
352    let mut best_t = f64::INFINITY;
353    let mut best_i = usize::MAX;
354    for (i, tri) in tris.iter().enumerate() {
355        let t = mt_f64(tri, o, d, max_t);
356        if t < best_t {
357            best_t = t;
358            best_i = i;
359        }
360    }
361    (best_i != usize::MAX).then_some(TriangleHit {
362        index: best_i,
363        t: best_t,
364    })
365}
366
367/// Branchless Moller-Trumbore in `f32`: ray parameter `t`, or `+inf` on a miss.
368#[inline(always)]
369#[cfg_attr(feature = "simd", allow(dead_code))]
370#[allow(clippy::manual_range_contains)] // branchless miss mask, kept for autovec
371fn mt_f32(t: &Triangle3DF32, o: [f32; 3], d: [f32; 3], max_t: f32) -> f32 {
372    let e1 = [t.b[0] - t.a[0], t.b[1] - t.a[1], t.b[2] - t.a[2]];
373    let e2 = [t.c[0] - t.a[0], t.c[1] - t.a[1], t.c[2] - t.a[2]];
374    let p = [
375        d[1] * e2[2] - d[2] * e2[1],
376        d[2] * e2[0] - d[0] * e2[2],
377        d[0] * e2[1] - d[1] * e2[0],
378    ];
379    let det = e1[0] * p[0] + e1[1] * p[1] + e1[2] * p[2];
380    let inv = 1.0 / det;
381    let s = [o[0] - t.a[0], o[1] - t.a[1], o[2] - t.a[2]];
382    let u = (s[0] * p[0] + s[1] * p[1] + s[2] * p[2]) * inv;
383    let q = [
384        s[1] * e1[2] - s[2] * e1[1],
385        s[2] * e1[0] - s[0] * e1[2],
386        s[0] * e1[1] - s[1] * e1[0],
387    ];
388    let v = (d[0] * q[0] + d[1] * q[1] + d[2] * q[2]) * inv;
389    let t = (e2[0] * q[0] + e2[1] * q[1] + e2[2] * q[2]) * inv;
390    let miss = (det.abs() < 1e-8)
391        | (u < 0.0)
392        | (u > 1.0)
393        | (v < 0.0)
394        | (u + v > 1.0)
395        | (t < 0.0)
396        | (t > max_t);
397    if miss { f32::INFINITY } else { t }
398}
399
400fn closest_f32(
401    o: [f64; 3],
402    d: [f64; 3],
403    max_t: f64,
404    tris: &[Triangle3DF32],
405) -> Option<TriangleHit> {
406    let o = [o[0] as f32, o[1] as f32, o[2] as f32];
407    let d = [d[0] as f32, d[1] as f32, d[2] as f32];
408    let max_t = max_t as f32;
409    #[cfg(feature = "simd")]
410    {
411        closest_f32_simd(o, d, max_t, tris)
412    }
413    #[cfg(not(feature = "simd"))]
414    {
415        let mut best_t = f32::INFINITY;
416        let mut best_i = usize::MAX;
417        for (i, tri) in tris.iter().enumerate() {
418            let t = mt_f32(tri, o, d, max_t);
419            if t < best_t {
420                best_t = t;
421                best_i = i;
422            }
423        }
424        (best_i != usize::MAX).then_some(TriangleHit {
425            index: best_i,
426            t: best_t as f64,
427        })
428    }
429}
430
431/// SIMD `f32` closest hit: 8 triangles per iteration through `wide::f32x8`. The
432/// AoS records are gathered into SoA lanes; tail lanes stay degenerate (miss).
433#[cfg(feature = "simd")]
434fn closest_f32_simd(
435    o: [f32; 3],
436    d: [f32; 3],
437    max_t: f32,
438    tris: &[Triangle3DF32],
439) -> Option<TriangleHit> {
440    use wide::f32x8;
441    let inf = f32x8::splat(f32::INFINITY);
442    let zero = f32x8::splat(0.0);
443    let one = f32x8::splat(1.0);
444    let eps = f32x8::splat(1e-8);
445    let maxv = f32x8::splat(max_t);
446    let ox = f32x8::splat(o[0]);
447    let oy = f32x8::splat(o[1]);
448    let oz = f32x8::splat(o[2]);
449    let dx = f32x8::splat(d[0]);
450    let dy = f32x8::splat(d[1]);
451    let dz = f32x8::splat(d[2]);
452
453    let mut best_t = f32::INFINITY;
454    let mut best_i = usize::MAX;
455    for (chunk_i, chunk) in tris.chunks(8).enumerate() {
456        let mut g = [[0.0f32; 8]; 9];
457        for (l, t) in chunk.iter().enumerate() {
458            g[0][l] = t.a[0];
459            g[1][l] = t.a[1];
460            g[2][l] = t.a[2];
461            g[3][l] = t.b[0];
462            g[4][l] = t.b[1];
463            g[5][l] = t.b[2];
464            g[6][l] = t.c[0];
465            g[7][l] = t.c[1];
466            g[8][l] = t.c[2];
467        }
468        let ax = f32x8::from(g[0]);
469        let ay = f32x8::from(g[1]);
470        let az = f32x8::from(g[2]);
471        let e1x = f32x8::from(g[3]) - ax;
472        let e1y = f32x8::from(g[4]) - ay;
473        let e1z = f32x8::from(g[5]) - az;
474        let e2x = f32x8::from(g[6]) - ax;
475        let e2y = f32x8::from(g[7]) - ay;
476        let e2z = f32x8::from(g[8]) - az;
477
478        let px = dy * e2z - dz * e2y;
479        let py = dz * e2x - dx * e2z;
480        let pz = dx * e2y - dy * e2x;
481        let det = e1x * px + e1y * py + e1z * pz;
482        let inv = one / det;
483        let sx = ox - ax;
484        let sy = oy - ay;
485        let sz = oz - az;
486        let u = (sx * px + sy * py + sz * pz) * inv;
487        let qx = sy * e1z - sz * e1y;
488        let qy = sz * e1x - sx * e1z;
489        let qz = sx * e1y - sy * e1x;
490        let v = (dx * qx + dy * qy + dz * qz) * inv;
491        let t = (e2x * qx + e2y * qy + e2z * qz) * inv;
492
493        let miss = det.abs().simd_lt(eps)
494            | u.simd_lt(zero)
495            | u.simd_gt(one)
496            | v.simd_lt(zero)
497            | (u + v).simd_gt(one)
498            | t.simd_lt(zero)
499            | t.simd_gt(maxv);
500        let t = miss.blend(inf, t);
501
502        let base = chunk_i * 8;
503        for (l, &tl) in t.to_array().iter().enumerate() {
504            if tl < best_t {
505                best_t = tl;
506                best_i = base + l;
507            }
508        }
509    }
510    (best_i != usize::MAX).then_some(TriangleHit {
511        index: best_i,
512        t: best_t as f64,
513    })
514}
515
516#[cfg(all(test, feature = "simd"))]
517mod tests {
518    use super::*;
519
520    #[test]
521    fn f32_simd_matches_scalar() {
522        use rand::rngs::StdRng;
523        use rand::{RngExt, SeedableRng};
524        let mut rng = StdRng::seed_from_u64(0x7711);
525        let tris: Vec<Triangle3DF32> = (0..1500)
526            .map(|_| {
527                let c = [
528                    rng.random_range(0.0..10.0f32),
529                    rng.random_range(0.0..10.0),
530                    rng.random_range(0.0..10.0),
531                ];
532                let mut v = || {
533                    [
534                        c[0] + rng.random_range(-1.5..1.5f32),
535                        c[1] + rng.random_range(-1.5..1.5),
536                        c[2] + rng.random_range(-1.5..1.5),
537                    ]
538                };
539                Triangle3DF32::new(v(), v(), v())
540            })
541            .collect();
542        for _ in 0..300 {
543            let o = [
544                rng.random_range(0.0..10.0f32),
545                rng.random_range(0.0..10.0),
546                rng.random_range(0.0..10.0),
547            ];
548            let d = [
549                rng.random_range(-1.0..1.0f32),
550                rng.random_range(-1.0..1.0),
551                rng.random_range(-1.0..1.0),
552            ];
553            let simd = closest_f32_simd(o, d, 100.0, &tris);
554            let mut best_t = f32::INFINITY;
555            let mut best_i = usize::MAX;
556            for (i, tri) in tris.iter().enumerate() {
557                let t = mt_f32(tri, o, d, 100.0);
558                if t < best_t {
559                    best_t = t;
560                    best_i = i;
561                }
562            }
563            let scalar = (best_i != usize::MAX).then_some(best_i);
564            // When both hit, indices/params agree; a rare grazing-edge split is OK.
565            if let (Some(s), Some(v)) = (scalar, simd) {
566                assert!(
567                    s == v.index || (best_t as f64 - v.t).abs() < 1e-3,
568                    "scalar {s} vs simd {v:?}"
569                );
570            }
571        }
572    }
573}