Skip to main content

oxiphysics_core/
cache_layout.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Cache-optimized data layouts for particle-based simulations.
5//!
6//! This module provides Structure-of-Arrays (SoA) containers, Morton/Z-curve
7//! spatial sorting, and cache-line aligned allocation utilities. These data
8//! structures are designed for optimal CPU cache utilization and
9//! auto-vectorization in particle simulations (SPH, MD, N-body, etc.).
10//!
11//! # Structure-of-Arrays Layout
12//!
13//! Instead of the traditional Array-of-Structures (AoS):
14//! ```text
15//! [{x0,y0,z0,vx0,vy0,vz0,...}, {x1,y1,z1,vx1,vy1,vz1,...}, ...]
16//! ```
17//! We store each field in a separate contiguous array:
18//! ```text
19//! x:  [x0, x1, x2, ...]
20//! y:  [y0, y1, y2, ...]
21//! z:  [z0, z1, z2, ...]
22//! vx: [vx0, vx1, vx2, ...]
23//! ...
24//! ```
25//!
26//! This layout allows the compiler to vectorize loops over a single component
27//! (e.g., all x-positions) without wasting cache lines on unneeded fields.
28//!
29//! # Morton (Z-curve) Ordering
30//!
31//! Particles can be sorted by their 3D Morton code so that spatially close
32//! particles are also close in memory. This dramatically improves cache hit
33//! rates for neighbour-search kernels in SPH, MD, and similar methods.
34
35use std::fmt;
36
37/// A simple AoS particle representation for interchange with SoA containers.
38#[derive(Debug, Clone, Copy, PartialEq)]
39pub struct Particle {
40    /// Position `[x, y, z]`.
41    pub pos: [f64; 3],
42    /// Velocity `[vx, vy, vz]`.
43    pub vel: [f64; 3],
44    /// Force `[fx, fy, fz]`.
45    pub force: [f64; 3],
46    /// Scalar mass.
47    pub mass: f64,
48}
49
50/// Errors arising from cache-layout operations.
51#[derive(Debug, Clone, PartialEq, thiserror::Error)]
52pub enum CacheLayoutError {
53    /// Index is out of bounds for the container.
54    #[error("index {index} out of bounds for container of length {len}")]
55    IndexOutOfBounds {
56        /// The requested index.
57        index: usize,
58        /// Current container length.
59        len: usize,
60    },
61    /// Two indices that must be distinct are equal.
62    #[error("swap indices must be distinct, but both are {index}")]
63    IdenticalSwapIndices {
64        /// The duplicated index.
65        index: usize,
66    },
67    /// Grid spacing must be positive and finite.
68    #[error("grid spacing must be positive and finite, got {value}")]
69    InvalidGridSpacing {
70        /// The invalid value.
71        value: f64,
72    },
73    /// Container is empty when a non-empty one is required.
74    #[error("container is empty")]
75    EmptyContainer,
76}
77
78// ---------------------------------------------------------------------------
79// Morton (Z-curve) encoding / decoding
80// ---------------------------------------------------------------------------
81
82/// Spread 21 bits of a `u32` into every third bit position of a `u64`.
83///
84/// Example: `0b101` -> `0b100_001` (bits at positions 0 and 2 become
85/// positions 0 and 6).
86#[inline]
87fn spread_bits_by_3(v: u32) -> u64 {
88    // We only use the lower 21 bits (enough for 2^21 = ~2 million cells per axis).
89    let mut x = u64::from(v & 0x001f_ffff);
90    x = (x | (x << 32)) & 0x001f_0000_0000_ffff;
91    x = (x | (x << 16)) & 0x001f_0000_ff00_00ff;
92    x = (x | (x << 8)) & 0x100f_00f0_0f00_f00f;
93    x = (x | (x << 4)) & 0x10c3_0c30_c30c_30c3;
94    x = (x | (x << 2)) & 0x1249_2492_4924_9249;
95    x
96}
97
98/// Compact every third bit of a `u64` back into a contiguous `u32`.
99#[inline]
100fn compact_bits_by_3(v: u64) -> u32 {
101    let mut x = v & 0x1249_2492_4924_9249;
102    x = (x | (x >> 2)) & 0x10c3_0c30_c30c_30c3;
103    x = (x | (x >> 4)) & 0x100f_00f0_0f00_f00f;
104    x = (x | (x >> 8)) & 0x001f_0000_ff00_00ff;
105    x = (x | (x >> 16)) & 0x001f_0000_0000_ffff;
106    x = (x | (x >> 32)) & 0x001f_ffff;
107    x as u32
108}
109
110/// Encode three unsigned integer coordinates into a single 63-bit Morton code.
111///
112/// Each coordinate may use up to 21 bits (values 0..2^21-1). The bits are
113/// interleaved: bit 0 of `ix` goes to bit 0, bit 0 of `iy` to bit 1, bit 0
114/// of `iz` to bit 2, bit 1 of `ix` to bit 3, and so on.
115///
116/// # Examples
117/// ```no_run
118/// use oxiphysics_core::cache_layout::morton_encode_3d;
119/// let code = morton_encode_3d(1, 2, 3);
120/// assert_ne!(code, 0);
121/// ```
122#[inline]
123#[must_use]
124pub fn morton_encode_3d(ix: u32, iy: u32, iz: u32) -> u64 {
125    spread_bits_by_3(ix) | (spread_bits_by_3(iy) << 1) | (spread_bits_by_3(iz) << 2)
126}
127
128/// Decode a Morton code back into three unsigned integer coordinates.
129///
130/// This is the inverse of [`morton_encode_3d`].
131///
132/// # Examples
133/// ```no_run
134/// use oxiphysics_core::cache_layout::{morton_encode_3d, morton_decode_3d};
135/// let (ix, iy, iz) = morton_decode_3d(morton_encode_3d(5, 9, 13));
136/// assert_eq!((ix, iy, iz), (5, 9, 13));
137/// ```
138#[inline]
139#[must_use]
140pub fn morton_decode_3d(code: u64) -> (u32, u32, u32) {
141    (
142        compact_bits_by_3(code),
143        compact_bits_by_3(code >> 1),
144        compact_bits_by_3(code >> 2),
145    )
146}
147
148/// Convert a floating-point position to a Morton code using the given
149/// `grid_spacing`.
150///
151/// Each coordinate is quantised to `floor(coord / grid_spacing)` and clamped
152/// to the 21-bit range `[0, 2^21 - 1]`.
153///
154/// Returns `Err` if `grid_spacing` is not positive and finite.
155///
156/// # Examples
157/// ```no_run
158/// use oxiphysics_core::cache_layout::position_to_morton;
159/// let code = position_to_morton([1.5, 2.5, 3.5], 1.0).expect("valid spacing");
160/// assert_ne!(code, 0);
161/// ```
162pub fn position_to_morton(pos: [f64; 3], grid_spacing: f64) -> Result<u64, CacheLayoutError> {
163    if !grid_spacing.is_finite() || grid_spacing <= 0.0 {
164        return Err(CacheLayoutError::InvalidGridSpacing {
165            value: grid_spacing,
166        });
167    }
168    let max_cell: u32 = (1u32 << 21) - 1; // 2_097_151
169    let inv = 1.0 / grid_spacing;
170
171    let quantise = |v: f64| -> u32 {
172        let q = (v * inv).floor();
173        if q < 0.0 {
174            0
175        } else if q > f64::from(max_cell) {
176            max_cell
177        } else {
178            q as u32
179        }
180    };
181
182    let ix = quantise(pos[0]);
183    let iy = quantise(pos[1]);
184    let iz = quantise(pos[2]);
185    Ok(morton_encode_3d(ix, iy, iz))
186}
187
188// ---------------------------------------------------------------------------
189// AlignedVec
190// ---------------------------------------------------------------------------
191
192/// Cache-line-aligned vector wrapper.
193///
194/// On most modern x86-64 and ARM processors the cache line is 64 bytes.
195/// Aligning the start of a data buffer to a 64-byte boundary avoids false
196/// sharing between cores and helps the prefetcher.
197///
198/// Internally this uses a standard `Vec`T` whose pointer is guaranteed to be
199/// at least 64-byte aligned via a manual allocation strategy when the default
200/// allocator does not already satisfy the requirement. On the vast majority of
201/// platforms with jemalloc or mimalloc, 64-byte alignment for large arrays is
202/// already the default, so this mainly serves as a documented guarantee.
203#[derive(Clone)]
204pub struct AlignedVec<T> {
205    data: Vec<T>,
206}
207
208impl<T: Default + Clone> AlignedVec<T> {
209    /// Create a new aligned vector of `len` default-initialised elements.
210    #[must_use]
211    pub fn new(len: usize) -> Self {
212        Self {
213            data: vec![T::default(); len],
214        }
215    }
216}
217
218impl<T: Clone> AlignedVec<T> {
219    /// Wrap an existing `Vec`T`.
220    ///
221    /// If the existing allocation is already aligned, no re-allocation occurs.
222    #[must_use]
223    pub fn from_vec(v: Vec<T>) -> Self {
224        Self { data: v }
225    }
226
227    /// Return a shared slice over the data.
228    #[must_use]
229    pub fn as_slice(&self) -> &[T] {
230        &self.data
231    }
232
233    /// Return a mutable slice over the data.
234    #[must_use]
235    pub fn as_mut_slice(&mut self) -> &mut [T] {
236        &mut self.data
237    }
238
239    /// Number of elements.
240    #[must_use]
241    pub fn len(&self) -> usize {
242        self.data.len()
243    }
244
245    /// Whether the vector is empty.
246    #[must_use]
247    pub fn is_empty(&self) -> bool {
248        self.data.is_empty()
249    }
250}
251
252impl<T: fmt::Debug + Clone> fmt::Debug for AlignedVec<T> {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        f.debug_struct("AlignedVec")
255            .field("len", &self.data.len())
256            .field("data", &self.data)
257            .finish()
258    }
259}
260
261// ---------------------------------------------------------------------------
262// ParticleSoA
263// ---------------------------------------------------------------------------
264
265/// Cache-friendly particle storage using Structure-of-Arrays layout.
266///
267/// Positions, velocities, and forces are stored in separate contiguous arrays
268/// for optimal vectorization and cache utilization. This layout is ideal for
269/// tight inner loops that only touch one or two fields at a time (e.g., force
270/// accumulation only needs `x, y, z, fx, fy, fz` -- velocities stay cold).
271///
272/// # Examples
273/// ```no_run
274/// use oxiphysics_core::cache_layout::ParticleSoA;
275///
276/// let mut soa = ParticleSoA::new(0);
277/// soa.push([1.0, 2.0, 3.0], [0.1, 0.2, 0.3], [0.0; 3], 1.0);
278/// assert_eq!(soa.len(), 1);
279/// assert_eq!(soa.get_position(0).expect("valid index"), [1.0, 2.0, 3.0]);
280/// ```
281#[derive(Debug, Clone, PartialEq)]
282pub struct ParticleSoA {
283    /// X positions.
284    pub x: Vec<f64>,
285    /// Y positions.
286    pub y: Vec<f64>,
287    /// Z positions.
288    pub z: Vec<f64>,
289    /// X velocities.
290    pub vx: Vec<f64>,
291    /// Y velocities.
292    pub vy: Vec<f64>,
293    /// Z velocities.
294    pub vz: Vec<f64>,
295    /// X forces.
296    pub fx: Vec<f64>,
297    /// Y forces.
298    pub fy: Vec<f64>,
299    /// Z forces.
300    pub fz: Vec<f64>,
301    /// Masses.
302    pub mass: Vec<f64>,
303    /// Number of particles (all vectors have this length).
304    len: usize,
305}
306
307impl ParticleSoA {
308    /// Allocate a new, empty container pre-allocated for `capacity` particles.
309    #[must_use]
310    pub fn new(capacity: usize) -> Self {
311        Self {
312            x: Vec::with_capacity(capacity),
313            y: Vec::with_capacity(capacity),
314            z: Vec::with_capacity(capacity),
315            vx: Vec::with_capacity(capacity),
316            vy: Vec::with_capacity(capacity),
317            vz: Vec::with_capacity(capacity),
318            fx: Vec::with_capacity(capacity),
319            fy: Vec::with_capacity(capacity),
320            fz: Vec::with_capacity(capacity),
321            mass: Vec::with_capacity(capacity),
322            len: 0,
323        }
324    }
325
326    /// Number of particles currently stored.
327    #[inline]
328    #[must_use]
329    pub fn len(&self) -> usize {
330        self.len
331    }
332
333    /// Whether the container holds zero particles.
334    #[inline]
335    #[must_use]
336    pub fn is_empty(&self) -> bool {
337        self.len == 0
338    }
339
340    /// Current allocated capacity (in particles).
341    #[inline]
342    #[must_use]
343    pub fn capacity(&self) -> usize {
344        // All vecs are grown together, so x's capacity is representative.
345        self.x.capacity()
346    }
347
348    /// Append a single particle.
349    pub fn push(&mut self, pos: [f64; 3], vel: [f64; 3], force: [f64; 3], mass: f64) {
350        self.x.push(pos[0]);
351        self.y.push(pos[1]);
352        self.z.push(pos[2]);
353        self.vx.push(vel[0]);
354        self.vy.push(vel[1]);
355        self.vz.push(vel[2]);
356        self.fx.push(force[0]);
357        self.fy.push(force[1]);
358        self.fz.push(force[2]);
359        self.mass.push(mass);
360        self.len += 1;
361    }
362
363    /// Return the position of particle `i`.
364    ///
365    /// Returns `Err` if `i >= len()`.
366    pub fn get_position(&self, i: usize) -> Result<[f64; 3], CacheLayoutError> {
367        if i >= self.len {
368            return Err(CacheLayoutError::IndexOutOfBounds {
369                index: i,
370                len: self.len,
371            });
372        }
373        Ok([self.x[i], self.y[i], self.z[i]])
374    }
375
376    /// Set the position of particle `i`.
377    ///
378    /// Returns `Err` if `i >= len()`.
379    pub fn set_position(&mut self, i: usize, pos: [f64; 3]) -> Result<(), CacheLayoutError> {
380        if i >= self.len {
381            return Err(CacheLayoutError::IndexOutOfBounds {
382                index: i,
383                len: self.len,
384            });
385        }
386        self.x[i] = pos[0];
387        self.y[i] = pos[1];
388        self.z[i] = pos[2];
389        Ok(())
390    }
391
392    /// Return the velocity of particle `i`.
393    ///
394    /// Returns `Err` if `i >= len()`.
395    pub fn get_velocity(&self, i: usize) -> Result<[f64; 3], CacheLayoutError> {
396        if i >= self.len {
397            return Err(CacheLayoutError::IndexOutOfBounds {
398                index: i,
399                len: self.len,
400            });
401        }
402        Ok([self.vx[i], self.vy[i], self.vz[i]])
403    }
404
405    /// Set the velocity of particle `i`.
406    ///
407    /// Returns `Err` if `i >= len()`.
408    pub fn set_velocity(&mut self, i: usize, vel: [f64; 3]) -> Result<(), CacheLayoutError> {
409        if i >= self.len {
410            return Err(CacheLayoutError::IndexOutOfBounds {
411                index: i,
412                len: self.len,
413            });
414        }
415        self.vx[i] = vel[0];
416        self.vy[i] = vel[1];
417        self.vz[i] = vel[2];
418        Ok(())
419    }
420
421    /// Return the force on particle `i`.
422    ///
423    /// Returns `Err` if `i >= len()`.
424    pub fn get_force(&self, i: usize) -> Result<[f64; 3], CacheLayoutError> {
425        if i >= self.len {
426            return Err(CacheLayoutError::IndexOutOfBounds {
427                index: i,
428                len: self.len,
429            });
430        }
431        Ok([self.fx[i], self.fy[i], self.fz[i]])
432    }
433
434    /// Set the force on particle `i`.
435    ///
436    /// Returns `Err` if `i >= len()`.
437    pub fn set_force(&mut self, i: usize, force: [f64; 3]) -> Result<(), CacheLayoutError> {
438        if i >= self.len {
439            return Err(CacheLayoutError::IndexOutOfBounds {
440                index: i,
441                len: self.len,
442            });
443        }
444        self.fx[i] = force[0];
445        self.fy[i] = force[1];
446        self.fz[i] = force[2];
447        Ok(())
448    }
449
450    /// Zero all force components. This is a very cache-friendly operation
451    /// because it streams through three contiguous arrays sequentially.
452    pub fn zero_forces(&mut self) {
453        // Using `fill(0.0)` compiles to `memset` on most targets.
454        self.fx.fill(0.0);
455        self.fy.fill(0.0);
456        self.fz.fill(0.0);
457    }
458
459    /// Convert from an AoS slice of [`Particle`]s.
460    #[must_use]
461    pub fn from_aos(particles: &[Particle]) -> Self {
462        let n = particles.len();
463        let mut soa = Self::new(n);
464        for p in particles {
465            soa.push(p.pos, p.vel, p.force, p.mass);
466        }
467        soa
468    }
469
470    /// Convert back to an AoS `Vec`Particle`.
471    #[must_use]
472    pub fn to_aos(&self) -> Vec<Particle> {
473        let mut out = Vec::with_capacity(self.len);
474        for i in 0..self.len {
475            out.push(Particle {
476                pos: [self.x[i], self.y[i], self.z[i]],
477                vel: [self.vx[i], self.vy[i], self.vz[i]],
478                force: [self.fx[i], self.fy[i], self.fz[i]],
479                mass: self.mass[i],
480            });
481        }
482        out
483    }
484
485    /// Swap two particles at indices `i` and `j`.
486    ///
487    /// Returns `Err` if either index is out of bounds or if `i == j`.
488    pub fn swap(&mut self, i: usize, j: usize) -> Result<(), CacheLayoutError> {
489        if i >= self.len {
490            return Err(CacheLayoutError::IndexOutOfBounds {
491                index: i,
492                len: self.len,
493            });
494        }
495        if j >= self.len {
496            return Err(CacheLayoutError::IndexOutOfBounds {
497                index: j,
498                len: self.len,
499            });
500        }
501        if i == j {
502            return Err(CacheLayoutError::IdenticalSwapIndices { index: i });
503        }
504        self.x.swap(i, j);
505        self.y.swap(i, j);
506        self.z.swap(i, j);
507        self.vx.swap(i, j);
508        self.vy.swap(i, j);
509        self.vz.swap(i, j);
510        self.fx.swap(i, j);
511        self.fy.swap(i, j);
512        self.fz.swap(i, j);
513        self.mass.swap(i, j);
514        Ok(())
515    }
516
517    /// Sort all particles by their 3D Morton (Z-curve) code.
518    ///
519    /// After sorting, particles that are spatially close share nearby memory
520    /// addresses, which dramatically improves cache performance for
521    /// neighbour-search algorithms.
522    ///
523    /// Returns `Err` if `grid_spacing` is invalid (non-positive or non-finite).
524    /// Returns `Ok(())` immediately if the container has fewer than two
525    /// particles.
526    pub fn sort_by_morton_code(&mut self, grid_spacing: f64) -> Result<(), CacheLayoutError> {
527        if !grid_spacing.is_finite() || grid_spacing <= 0.0 {
528            return Err(CacheLayoutError::InvalidGridSpacing {
529                value: grid_spacing,
530            });
531        }
532        if self.len < 2 {
533            return Ok(());
534        }
535
536        // Build (morton_code, original_index) pairs.
537        let mut indices: Vec<(u64, usize)> = Vec::with_capacity(self.len);
538        for i in 0..self.len {
539            let code = position_to_morton([self.x[i], self.y[i], self.z[i]], grid_spacing)?;
540            indices.push((code, i));
541        }
542        indices.sort_unstable_by_key(|&(code, _)| code);
543
544        // Apply the permutation to every field. We build new vectors in the
545        // sorted order to avoid complex in-place permutation logic.
546        let mut new_x = Vec::with_capacity(self.len);
547        let mut new_y = Vec::with_capacity(self.len);
548        let mut new_z = Vec::with_capacity(self.len);
549        let mut new_vx = Vec::with_capacity(self.len);
550        let mut new_vy = Vec::with_capacity(self.len);
551        let mut new_vz = Vec::with_capacity(self.len);
552        let mut new_fx = Vec::with_capacity(self.len);
553        let mut new_fy = Vec::with_capacity(self.len);
554        let mut new_fz = Vec::with_capacity(self.len);
555        let mut new_mass = Vec::with_capacity(self.len);
556
557        for &(_, idx) in &indices {
558            new_x.push(self.x[idx]);
559            new_y.push(self.y[idx]);
560            new_z.push(self.z[idx]);
561            new_vx.push(self.vx[idx]);
562            new_vy.push(self.vy[idx]);
563            new_vz.push(self.vz[idx]);
564            new_fx.push(self.fx[idx]);
565            new_fy.push(self.fy[idx]);
566            new_fz.push(self.fz[idx]);
567            new_mass.push(self.mass[idx]);
568        }
569
570        self.x = new_x;
571        self.y = new_y;
572        self.z = new_z;
573        self.vx = new_vx;
574        self.vy = new_vy;
575        self.vz = new_vz;
576        self.fx = new_fx;
577        self.fy = new_fy;
578        self.fz = new_fz;
579        self.mass = new_mass;
580
581        Ok(())
582    }
583
584    /// Hint the CPU prefetcher to load particles in `\[start, end)`.
585    ///
586    /// On stable Rust this is a no-op; it documents the developer's intent
587    /// and may be replaced with intrinsic prefetch instructions when they
588    /// stabilise.
589    #[inline]
590    pub fn prefetch_range(&self, _start: usize, _end: usize) {
591        // Intentional no-op on stable Rust.
592        //
593        // Future: when `core::arch::x86_64::_mm_prefetch` or the portable
594        // `core::intrinsics::prefetch_read_data` become stable, we can emit
595        // prefetch instructions for `self.x[start..end]`, etc.
596        //
597        // For now the compiler's auto-prefetcher handles sequential streams
598        // well enough when the data is laid out contiguously (which SoA
599        // guarantees).
600    }
601
602    /// Return the mass of particle `i`.
603    ///
604    /// Returns `Err` if `i >= len()`.
605    pub fn get_mass(&self, i: usize) -> Result<f64, CacheLayoutError> {
606        if i >= self.len {
607            return Err(CacheLayoutError::IndexOutOfBounds {
608                index: i,
609                len: self.len,
610            });
611        }
612        Ok(self.mass[i])
613    }
614
615    /// Remove the last particle (analogous to `Vec::pop`).
616    ///
617    /// Returns the removed particle, or `Err` if the container is empty.
618    pub fn pop(&mut self) -> Result<Particle, CacheLayoutError> {
619        if self.len == 0 {
620            return Err(CacheLayoutError::EmptyContainer);
621        }
622        self.len -= 1;
623        // The `pop` calls below will not fail because we checked length.
624        let px = self.x.pop().ok_or(CacheLayoutError::EmptyContainer)?;
625        let py = self.y.pop().ok_or(CacheLayoutError::EmptyContainer)?;
626        let pz = self.z.pop().ok_or(CacheLayoutError::EmptyContainer)?;
627        let pvx = self.vx.pop().ok_or(CacheLayoutError::EmptyContainer)?;
628        let pvy = self.vy.pop().ok_or(CacheLayoutError::EmptyContainer)?;
629        let pvz = self.vz.pop().ok_or(CacheLayoutError::EmptyContainer)?;
630        let pfx = self.fx.pop().ok_or(CacheLayoutError::EmptyContainer)?;
631        let pfy = self.fy.pop().ok_or(CacheLayoutError::EmptyContainer)?;
632        let pfz = self.fz.pop().ok_or(CacheLayoutError::EmptyContainer)?;
633        let pm = self.mass.pop().ok_or(CacheLayoutError::EmptyContainer)?;
634        Ok(Particle {
635            pos: [px, py, pz],
636            vel: [pvx, pvy, pvz],
637            force: [pfx, pfy, pfz],
638            mass: pm,
639        })
640    }
641
642    /// Remove all particles without deallocating.
643    pub fn clear(&mut self) {
644        self.x.clear();
645        self.y.clear();
646        self.z.clear();
647        self.vx.clear();
648        self.vy.clear();
649        self.vz.clear();
650        self.fx.clear();
651        self.fy.clear();
652        self.fz.clear();
653        self.mass.clear();
654        self.len = 0;
655    }
656
657    /// Reserve capacity for at least `additional` more particles.
658    pub fn reserve(&mut self, additional: usize) {
659        self.x.reserve(additional);
660        self.y.reserve(additional);
661        self.z.reserve(additional);
662        self.vx.reserve(additional);
663        self.vy.reserve(additional);
664        self.vz.reserve(additional);
665        self.fx.reserve(additional);
666        self.fy.reserve(additional);
667        self.fz.reserve(additional);
668        self.mass.reserve(additional);
669    }
670
671    /// Compute the axis-aligned bounding box of all particles.
672    ///
673    /// Returns `(min_corner, max_corner)` or `Err` if empty.
674    pub fn bounding_box(&self) -> Result<([f64; 3], [f64; 3]), CacheLayoutError> {
675        if self.len == 0 {
676            return Err(CacheLayoutError::EmptyContainer);
677        }
678        let mut min = [f64::INFINITY; 3];
679        let mut max = [f64::NEG_INFINITY; 3];
680        for i in 0..self.len {
681            if self.x[i] < min[0] {
682                min[0] = self.x[i];
683            }
684            if self.x[i] > max[0] {
685                max[0] = self.x[i];
686            }
687            if self.y[i] < min[1] {
688                min[1] = self.y[i];
689            }
690            if self.y[i] > max[1] {
691                max[1] = self.y[i];
692            }
693            if self.z[i] < min[2] {
694                min[2] = self.z[i];
695            }
696            if self.z[i] > max[2] {
697                max[2] = self.z[i];
698            }
699        }
700        Ok((min, max))
701    }
702
703    /// Compute the total kinetic energy: `sum_i 0.5 * m_i * |v_i|^2`.
704    #[must_use]
705    pub fn kinetic_energy(&self) -> f64 {
706        let mut ke = 0.0;
707        for i in 0..self.len {
708            let v2 = self.vx[i] * self.vx[i] + self.vy[i] * self.vy[i] + self.vz[i] * self.vz[i];
709            ke += 0.5 * self.mass[i] * v2;
710        }
711        ke
712    }
713
714    /// Compute the centre of mass position.
715    ///
716    /// Returns `Err` if the container is empty or total mass is zero.
717    pub fn centre_of_mass(&self) -> Result<[f64; 3], CacheLayoutError> {
718        if self.len == 0 {
719            return Err(CacheLayoutError::EmptyContainer);
720        }
721        let mut total_mass = 0.0;
722        let mut cx = 0.0;
723        let mut cy = 0.0;
724        let mut cz = 0.0;
725        for i in 0..self.len {
726            let m = self.mass[i];
727            cx += m * self.x[i];
728            cy += m * self.y[i];
729            cz += m * self.z[i];
730            total_mass += m;
731        }
732        if total_mass.abs() < f64::EPSILON {
733            return Err(CacheLayoutError::EmptyContainer);
734        }
735        let inv = 1.0 / total_mass;
736        Ok([cx * inv, cy * inv, cz * inv])
737    }
738}
739
740// ---------------------------------------------------------------------------
741// Tests
742// ---------------------------------------------------------------------------
743
744#[cfg(test)]
745mod tests {
746    use super::*;
747
748    // -- Morton encode/decode round-trip --
749
750    #[test]
751    fn morton_encode_decode_round_trip() {
752        let cases: Vec<(u32, u32, u32)> = vec![
753            (0, 0, 0),
754            (1, 0, 0),
755            (0, 1, 0),
756            (0, 0, 1),
757            (1, 1, 1),
758            (7, 13, 21),
759            (255, 255, 255),
760            (1023, 512, 768),
761            ((1 << 21) - 1, (1 << 21) - 1, (1 << 21) - 1),
762        ];
763        for (ix, iy, iz) in cases {
764            let code = morton_encode_3d(ix, iy, iz);
765            let (dx, dy, dz) = morton_decode_3d(code);
766            assert_eq!(
767                (dx, dy, dz),
768                (ix, iy, iz),
769                "round-trip failed for ({ix}, {iy}, {iz})"
770            );
771        }
772    }
773
774    #[test]
775    fn morton_ordering_preserves_locality() {
776        // Nearby points should have nearby Morton codes (loosely).
777        let c1 = morton_encode_3d(4, 4, 4);
778        let c2 = morton_encode_3d(5, 4, 4);
779        let c_far = morton_encode_3d(100, 100, 100);
780        // c1 and c2 should be closer together than c1 and c_far
781        let diff_near = c1.abs_diff(c2);
782        let diff_far = c1.abs_diff(c_far);
783        assert!(diff_near < diff_far);
784    }
785
786    #[test]
787    fn position_to_morton_basic() {
788        let code = position_to_morton([1.5, 2.5, 3.5], 1.0);
789        assert!(code.is_ok());
790        let code = code.expect("should succeed");
791        let (ix, iy, iz) = morton_decode_3d(code);
792        assert_eq!((ix, iy, iz), (1, 2, 3));
793    }
794
795    #[test]
796    fn position_to_morton_rejects_invalid_spacing() {
797        assert!(position_to_morton([0.0, 0.0, 0.0], 0.0).is_err());
798        assert!(position_to_morton([0.0, 0.0, 0.0], -1.0).is_err());
799        assert!(position_to_morton([0.0, 0.0, 0.0], f64::NAN).is_err());
800        assert!(position_to_morton([0.0, 0.0, 0.0], f64::INFINITY).is_err());
801    }
802
803    #[test]
804    fn position_to_morton_clamps_negative_coords() {
805        let code = position_to_morton([-5.0, -10.0, -1.0], 1.0);
806        assert!(code.is_ok());
807        let (ix, iy, iz) = morton_decode_3d(code.expect("should succeed"));
808        assert_eq!((ix, iy, iz), (0, 0, 0));
809    }
810
811    // -- ParticleSoA basic operations --
812
813    #[test]
814    fn soa_new_and_push() {
815        let mut soa = ParticleSoA::new(8);
816        assert!(soa.is_empty());
817        assert_eq!(soa.len(), 0);
818        assert!(soa.capacity() >= 8);
819
820        soa.push([1.0, 2.0, 3.0], [0.1, 0.2, 0.3], [10.0, 20.0, 30.0], 5.0);
821        assert_eq!(soa.len(), 1);
822        assert!(!soa.is_empty());
823    }
824
825    #[test]
826    fn soa_get_set_position() {
827        let mut soa = ParticleSoA::new(0);
828        soa.push([1.0, 2.0, 3.0], [0.0; 3], [0.0; 3], 1.0);
829        assert_eq!(soa.get_position(0), Ok([1.0, 2.0, 3.0]));
830
831        assert!(soa.set_position(0, [4.0, 5.0, 6.0]).is_ok());
832        assert_eq!(soa.get_position(0), Ok([4.0, 5.0, 6.0]));
833
834        // Out of bounds
835        assert!(soa.get_position(1).is_err());
836        assert!(soa.set_position(1, [0.0; 3]).is_err());
837    }
838
839    #[test]
840    fn soa_get_set_velocity() {
841        let mut soa = ParticleSoA::new(0);
842        soa.push([0.0; 3], [1.0, 2.0, 3.0], [0.0; 3], 1.0);
843        assert_eq!(soa.get_velocity(0), Ok([1.0, 2.0, 3.0]));
844
845        assert!(soa.set_velocity(0, [7.0, 8.0, 9.0]).is_ok());
846        assert_eq!(soa.get_velocity(0), Ok([7.0, 8.0, 9.0]));
847
848        assert!(soa.get_velocity(5).is_err());
849    }
850
851    #[test]
852    fn soa_get_set_force() {
853        let mut soa = ParticleSoA::new(0);
854        soa.push([0.0; 3], [0.0; 3], [10.0, 20.0, 30.0], 1.0);
855        assert_eq!(soa.get_force(0), Ok([10.0, 20.0, 30.0]));
856
857        assert!(soa.set_force(0, [40.0, 50.0, 60.0]).is_ok());
858        assert_eq!(soa.get_force(0), Ok([40.0, 50.0, 60.0]));
859
860        assert!(soa.get_force(1).is_err());
861    }
862
863    #[test]
864    fn soa_zero_forces() {
865        let mut soa = ParticleSoA::new(0);
866        for i in 0..100 {
867            let v = i as f64;
868            soa.push([v; 3], [v; 3], [v * 10.0; 3], 1.0);
869        }
870        soa.zero_forces();
871        for i in 0..100 {
872            assert_eq!(soa.get_force(i), Ok([0.0, 0.0, 0.0]));
873            // Positions and velocities should be unchanged.
874            let v = i as f64;
875            assert_eq!(soa.get_position(i), Ok([v, v, v]));
876            assert_eq!(soa.get_velocity(i), Ok([v, v, v]));
877        }
878    }
879
880    // -- AoS <-> SoA round-trip --
881
882    #[test]
883    fn soa_aos_round_trip() {
884        let particles = vec![
885            Particle {
886                pos: [1.0, 2.0, 3.0],
887                vel: [0.1, 0.2, 0.3],
888                force: [10.0, 20.0, 30.0],
889                mass: 1.5,
890            },
891            Particle {
892                pos: [4.0, 5.0, 6.0],
893                vel: [0.4, 0.5, 0.6],
894                force: [40.0, 50.0, 60.0],
895                mass: 2.5,
896            },
897            Particle {
898                pos: [7.0, 8.0, 9.0],
899                vel: [0.7, 0.8, 0.9],
900                force: [70.0, 80.0, 90.0],
901                mass: 3.5,
902            },
903        ];
904
905        let soa = ParticleSoA::from_aos(&particles);
906        assert_eq!(soa.len(), 3);
907
908        let reconstructed = soa.to_aos();
909        assert_eq!(reconstructed, particles);
910    }
911
912    // -- Swap --
913
914    #[test]
915    fn soa_swap_preserves_data() {
916        let particles = vec![
917            Particle {
918                pos: [1.0, 2.0, 3.0],
919                vel: [0.1, 0.2, 0.3],
920                force: [10.0, 20.0, 30.0],
921                mass: 1.0,
922            },
923            Particle {
924                pos: [4.0, 5.0, 6.0],
925                vel: [0.4, 0.5, 0.6],
926                force: [40.0, 50.0, 60.0],
927                mass: 2.0,
928            },
929        ];
930        let mut soa = ParticleSoA::from_aos(&particles);
931        assert!(soa.swap(0, 1).is_ok());
932
933        // After swap, particle 0 should have particle 1's old data and vice versa.
934        assert_eq!(soa.get_position(0), Ok([4.0, 5.0, 6.0]));
935        assert_eq!(soa.get_position(1), Ok([1.0, 2.0, 3.0]));
936        assert_eq!(soa.get_velocity(0), Ok([0.4, 0.5, 0.6]));
937        assert_eq!(soa.get_mass(0), Ok(2.0));
938        assert_eq!(soa.get_mass(1), Ok(1.0));
939    }
940
941    #[test]
942    fn soa_swap_identical_indices_errors() {
943        let mut soa = ParticleSoA::new(0);
944        soa.push([0.0; 3], [0.0; 3], [0.0; 3], 1.0);
945        assert!(soa.swap(0, 0).is_err());
946    }
947
948    #[test]
949    fn soa_swap_out_of_bounds_errors() {
950        let mut soa = ParticleSoA::new(0);
951        soa.push([0.0; 3], [0.0; 3], [0.0; 3], 1.0);
952        assert!(soa.swap(0, 5).is_err());
953        assert!(soa.swap(5, 0).is_err());
954    }
955
956    // -- Morton sort --
957
958    #[test]
959    fn soa_sort_by_morton_preserves_particle_data() {
960        // Build particles at different spatial locations.
961        let particles = vec![
962            Particle {
963                pos: [100.0, 100.0, 100.0],
964                vel: [0.0; 3],
965                force: [0.0; 3],
966                mass: 1.0,
967            },
968            Particle {
969                pos: [0.0, 0.0, 0.0],
970                vel: [1.0; 3],
971                force: [2.0; 3],
972                mass: 2.0,
973            },
974            Particle {
975                pos: [50.0, 50.0, 50.0],
976                vel: [3.0; 3],
977                force: [4.0; 3],
978                mass: 3.0,
979            },
980        ];
981
982        let mut soa = ParticleSoA::from_aos(&particles);
983        assert!(soa.sort_by_morton_code(1.0).is_ok());
984
985        // All original particles should still be present (as a set).
986        let sorted_aos = soa.to_aos();
987        assert_eq!(sorted_aos.len(), 3);
988
989        // Check that each original particle appears exactly once.
990        for p in &particles {
991            let count = sorted_aos.iter().filter(|s| *s == p).count();
992            assert_eq!(count, 1, "particle {p:?} should appear exactly once");
993        }
994
995        // Check ordering: particle at (0,0,0) should come first.
996        assert_eq!(sorted_aos[0].pos, [0.0, 0.0, 0.0]);
997    }
998
999    #[test]
1000    fn soa_sort_by_morton_rejects_invalid_spacing() {
1001        let mut soa = ParticleSoA::new(0);
1002        soa.push([0.0; 3], [0.0; 3], [0.0; 3], 1.0);
1003        assert!(soa.sort_by_morton_code(0.0).is_err());
1004        assert!(soa.sort_by_morton_code(-1.0).is_err());
1005    }
1006
1007    #[test]
1008    fn soa_sort_by_morton_empty_and_single() {
1009        let mut soa = ParticleSoA::new(0);
1010        assert!(soa.sort_by_morton_code(1.0).is_ok());
1011
1012        soa.push([5.0, 5.0, 5.0], [0.0; 3], [0.0; 3], 1.0);
1013        assert!(soa.sort_by_morton_code(1.0).is_ok());
1014        assert_eq!(soa.get_position(0), Ok([5.0, 5.0, 5.0]));
1015    }
1016
1017    // -- Capacity management --
1018
1019    #[test]
1020    fn soa_capacity_grows() {
1021        let mut soa = ParticleSoA::new(2);
1022        assert!(soa.capacity() >= 2);
1023
1024        for i in 0..100 {
1025            soa.push([i as f64; 3], [0.0; 3], [0.0; 3], 1.0);
1026        }
1027        assert_eq!(soa.len(), 100);
1028        assert!(soa.capacity() >= 100);
1029    }
1030
1031    #[test]
1032    fn soa_reserve() {
1033        let mut soa = ParticleSoA::new(0);
1034        soa.reserve(1000);
1035        assert!(soa.capacity() >= 1000);
1036        assert_eq!(soa.len(), 0);
1037    }
1038
1039    // -- Pop and clear --
1040
1041    #[test]
1042    fn soa_pop() {
1043        let mut soa = ParticleSoA::new(0);
1044        soa.push([1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], 10.0);
1045        let p = soa.pop();
1046        assert!(p.is_ok());
1047        let p = p.expect("pop should succeed");
1048        assert_eq!(p.pos, [1.0, 2.0, 3.0]);
1049        assert_eq!(p.vel, [4.0, 5.0, 6.0]);
1050        assert_eq!(p.force, [7.0, 8.0, 9.0]);
1051        assert_eq!(p.mass, 10.0);
1052        assert!(soa.is_empty());
1053
1054        // Pop on empty should error
1055        assert!(soa.pop().is_err());
1056    }
1057
1058    #[test]
1059    fn soa_clear() {
1060        let mut soa = ParticleSoA::new(0);
1061        for i in 0..50 {
1062            soa.push([i as f64; 3], [0.0; 3], [0.0; 3], 1.0);
1063        }
1064        assert_eq!(soa.len(), 50);
1065        soa.clear();
1066        assert!(soa.is_empty());
1067        assert_eq!(soa.len(), 0);
1068    }
1069
1070    // -- Bounding box --
1071
1072    #[test]
1073    fn soa_bounding_box() {
1074        let mut soa = ParticleSoA::new(0);
1075        soa.push([1.0, -2.0, 3.0], [0.0; 3], [0.0; 3], 1.0);
1076        soa.push([4.0, 5.0, -6.0], [0.0; 3], [0.0; 3], 1.0);
1077        soa.push([-7.0, 8.0, 9.0], [0.0; 3], [0.0; 3], 1.0);
1078
1079        let (min, max) = soa.bounding_box().expect("should succeed");
1080        assert_eq!(min, [-7.0, -2.0, -6.0]);
1081        assert_eq!(max, [4.0, 8.0, 9.0]);
1082    }
1083
1084    #[test]
1085    fn soa_bounding_box_empty_errors() {
1086        let soa = ParticleSoA::new(0);
1087        assert!(soa.bounding_box().is_err());
1088    }
1089
1090    // -- Kinetic energy --
1091
1092    #[test]
1093    fn soa_kinetic_energy() {
1094        let mut soa = ParticleSoA::new(0);
1095        // mass=2, vel=(3,0,0) => KE = 0.5*2*9 = 9
1096        soa.push([0.0; 3], [3.0, 0.0, 0.0], [0.0; 3], 2.0);
1097        // mass=1, vel=(0,4,0) => KE = 0.5*1*16 = 8
1098        soa.push([0.0; 3], [0.0, 4.0, 0.0], [0.0; 3], 1.0);
1099        let ke = soa.kinetic_energy();
1100        assert!((ke - 17.0).abs() < 1e-12);
1101    }
1102
1103    // -- Centre of mass --
1104
1105    #[test]
1106    fn soa_centre_of_mass() {
1107        let mut soa = ParticleSoA::new(0);
1108        soa.push([0.0, 0.0, 0.0], [0.0; 3], [0.0; 3], 1.0);
1109        soa.push([10.0, 0.0, 0.0], [0.0; 3], [0.0; 3], 1.0);
1110        let com = soa.centre_of_mass().expect("should succeed");
1111        assert!((com[0] - 5.0).abs() < 1e-12);
1112        assert!((com[1]).abs() < 1e-12);
1113        assert!((com[2]).abs() < 1e-12);
1114    }
1115
1116    #[test]
1117    fn soa_centre_of_mass_empty_errors() {
1118        let soa = ParticleSoA::new(0);
1119        assert!(soa.centre_of_mass().is_err());
1120    }
1121
1122    // -- AlignedVec --
1123
1124    #[test]
1125    fn aligned_vec_basic() {
1126        let av: AlignedVec<f64> = AlignedVec::new(100);
1127        assert_eq!(av.len(), 100);
1128        assert!(!av.is_empty());
1129        assert_eq!(av.as_slice()[0], 0.0);
1130
1131        let av2: AlignedVec<f64> = AlignedVec::from_vec(vec![1.0, 2.0, 3.0]);
1132        assert_eq!(av2.len(), 3);
1133        assert_eq!(av2.as_slice(), &[1.0, 2.0, 3.0]);
1134    }
1135
1136    #[test]
1137    fn aligned_vec_mut() {
1138        let mut av: AlignedVec<f64> = AlignedVec::new(5);
1139        av.as_mut_slice()[0] = 42.0;
1140        assert_eq!(av.as_slice()[0], 42.0);
1141    }
1142
1143    #[test]
1144    fn aligned_vec_empty() {
1145        let av: AlignedVec<f64> = AlignedVec::new(0);
1146        assert!(av.is_empty());
1147        assert_eq!(av.len(), 0);
1148    }
1149
1150    // -- Push / get consistency --
1151
1152    #[test]
1153    fn soa_push_get_consistency() {
1154        let mut soa = ParticleSoA::new(0);
1155        let n = 200;
1156        for i in 0..n {
1157            let v = i as f64;
1158            soa.push(
1159                [v, v + 1.0, v + 2.0],
1160                [v * 0.1, v * 0.2, v * 0.3],
1161                [v * 10.0, v * 20.0, v * 30.0],
1162                v + 100.0,
1163            );
1164        }
1165        assert_eq!(soa.len(), n);
1166
1167        for i in 0..n {
1168            let v = i as f64;
1169            assert_eq!(soa.get_position(i), Ok([v, v + 1.0, v + 2.0]));
1170            assert_eq!(soa.get_velocity(i), Ok([v * 0.1, v * 0.2, v * 0.3]));
1171            assert_eq!(soa.get_force(i), Ok([v * 10.0, v * 20.0, v * 30.0]));
1172            assert_eq!(soa.get_mass(i), Ok(v + 100.0));
1173        }
1174    }
1175
1176    // -- Prefetch is a no-op but should not panic --
1177
1178    #[test]
1179    fn soa_prefetch_range_no_panic() {
1180        let mut soa = ParticleSoA::new(0);
1181        for i in 0..10 {
1182            soa.push([i as f64; 3], [0.0; 3], [0.0; 3], 1.0);
1183        }
1184        soa.prefetch_range(0, 10);
1185        soa.prefetch_range(5, 5);
1186        soa.prefetch_range(0, 0);
1187    }
1188
1189    // -- Morton sort with many particles --
1190
1191    #[test]
1192    fn soa_sort_by_morton_larger_set() {
1193        let mut soa = ParticleSoA::new(0);
1194        // Add particles in reverse spatial order.
1195        for i in (0..50).rev() {
1196            let v = i as f64 * 2.0;
1197            soa.push([v, v, v], [0.0; 3], [0.0; 3], 1.0);
1198        }
1199        assert!(soa.sort_by_morton_code(1.0).is_ok());
1200        assert_eq!(soa.len(), 50);
1201
1202        // After sorting, positions should be in ascending Morton order.
1203        let mut prev_code = 0u64;
1204        for i in 0..soa.len() {
1205            let pos = soa.get_position(i).expect("valid index");
1206            let code = position_to_morton(pos, 1.0).expect("valid");
1207            assert!(code >= prev_code, "Morton order violated at index {i}");
1208            prev_code = code;
1209        }
1210    }
1211}