sparkl2d_kernels/
gpu_grid.rs

1use crate::DevicePointer;
2use na::vector;
3use sparkl_core::math::{Point, Real, Vector};
4
5use crate::cuda::{ActiveBlockHeader, AtomicAdd, GridHashMap, HaloState};
6#[cfg(not(feature = "std"))]
7use na::ComplexField;
8
9#[cfg(feature = "dim2")]
10pub const NUM_CELL_PER_BLOCK: u64 = 4 * 4;
11#[cfg(feature = "dim3")]
12pub const NUM_CELL_PER_BLOCK: u64 = 4 * 4 * 4;
13#[cfg(feature = "dim2")]
14pub const NUM_ASSOC_BLOCKS: usize = 4;
15#[cfg(feature = "dim3")]
16pub const NUM_ASSOC_BLOCKS: usize = 8;
17
18#[cfg(feature = "dim2")]
19pub const NBH_SHIFTS: [Vector<usize>; 9] = [
20    vector![2, 2],
21    vector![2, 0],
22    vector![2, 1],
23    vector![0, 2],
24    vector![0, 0],
25    vector![0, 1],
26    vector![1, 2],
27    vector![1, 0],
28    vector![1, 1],
29];
30
31#[cfg(feature = "dim3")]
32pub const NBH_SHIFTS: [Vector<usize>; 27] = [
33    vector![2, 2, 2],
34    vector![2, 0, 2],
35    vector![2, 1, 2],
36    vector![0, 2, 2],
37    vector![0, 0, 2],
38    vector![0, 1, 2],
39    vector![1, 2, 2],
40    vector![1, 0, 2],
41    vector![1, 1, 2],
42    vector![2, 2, 0],
43    vector![2, 0, 0],
44    vector![2, 1, 0],
45    vector![0, 2, 0],
46    vector![0, 0, 0],
47    vector![0, 1, 0],
48    vector![1, 2, 0],
49    vector![1, 0, 0],
50    vector![1, 1, 0],
51    vector![2, 2, 1],
52    vector![2, 0, 1],
53    vector![2, 1, 1],
54    vector![0, 2, 1],
55    vector![0, 0, 1],
56    vector![0, 1, 1],
57    vector![1, 2, 1],
58    vector![1, 0, 1],
59    vector![1, 1, 1],
60];
61
62#[cfg(feature = "dim3")]
63pub const NBH_SHIFTS_SHARED: [usize; 27] = [
64    146, 130, 138, 144, 128, 136, 145, 129, 137, 18, 2, 10, 16, 0, 8, 17, 1, 9, 82, 66, 74, 80, 64,
65    72, 81, 65, 73,
66];
67#[cfg(feature = "dim2")]
68pub const NBH_SHIFTS_SHARED: [usize; 9] = [18, 2, 10, 16, 0, 8, 17, 1, 9];
69
70#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
71#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, bytemuck::Zeroable)]
72#[repr(transparent)]
73pub struct BlockVirtualId(pub u64);
74
75impl BlockVirtualId {
76    #[cfg(feature = "dim2")]
77    pub const PACK_ORIGIN: u64 = 0x0000_8000_0000;
78    #[cfg(feature = "dim3")]
79    pub const PACK_ORIGIN: u64 = 0x0010_0000;
80
81    #[cfg(feature = "dim2")]
82    const MASK: u64 = 0x0000_0000_ffff_ffff;
83    #[cfg(feature = "dim3")]
84    const MASK: u64 = 0b0001_1111_1111_1111_1111_1111;
85
86    pub fn unpack_pos_on_signed_grid(self) -> Vector<i64> {
87        self.unpack().cast::<i64>() - Vector::repeat(Self::PACK_ORIGIN as i64)
88    }
89
90    #[cfg(feature = "dim2")]
91    pub fn pack(idx: Vector<usize>) -> BlockVirtualId {
92        // In 2D, we can take the 32 first bits into account (using a total of 64 bits).
93        BlockVirtualId((idx.x as u64 & Self::MASK) | ((idx.y as u64 & Self::MASK) << 32))
94    }
95
96    #[cfg(feature = "dim3")]
97    pub fn pack(idx: Vector<usize>) -> BlockVirtualId {
98        // In 3D, we can only take the 21 first bits into account (using a total of 63 bits).
99        BlockVirtualId(
100            (idx.x as u64 & Self::MASK)
101                | ((idx.y as u64 & Self::MASK) << 21)
102                | ((idx.z as u64 & Self::MASK) << 42),
103        )
104    }
105
106    #[cfg(feature = "dim2")]
107    pub fn unpack(self) -> Vector<usize> {
108        vector![self.0 & Self::MASK, (self.0 >> 32) & Self::MASK].cast::<usize>()
109    }
110
111    #[cfg(feature = "dim3")]
112    pub fn unpack(self) -> Vector<usize> {
113        vector![
114            self.0 & Self::MASK,
115            (self.0 >> 21) & Self::MASK,
116            (self.0 >> 42) & Self::MASK
117        ]
118        .cast::<usize>()
119    }
120}
121
122#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
123#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, bytemuck::Zeroable)]
124#[repr(transparent)]
125pub struct BlockHeaderId(pub u32);
126
127impl BlockHeaderId {
128    pub fn to_physical(self) -> BlockPhysicalId {
129        BlockPhysicalId(self.0 as u64 * NUM_CELL_PER_BLOCK)
130    }
131}
132
133#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
134#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, bytemuck::Zeroable)]
135#[repr(transparent)]
136pub struct BlockPhysicalId(pub u64);
137
138#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
139#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, bytemuck::Zeroable)]
140#[repr(transparent)]
141pub struct NodePhysicalId(pub u64);
142
143impl BlockPhysicalId {
144    // All the components of the shift must be between 0 and 4.
145    #[cfg(feature = "dim2")]
146    pub fn node_id_unchecked(self, shift: Vector<usize>) -> NodePhysicalId {
147        NodePhysicalId(self.0 + shift.x as u64 + shift.y as u64 * 4)
148    }
149
150    // All the components of the shift must be between 0 and 4.
151    #[cfg(feature = "dim3")]
152    pub fn node_id_unchecked(self, shift_in_block: Vector<usize>) -> NodePhysicalId {
153        NodePhysicalId(
154            self.0
155                + shift_in_block.x as u64
156                + shift_in_block.y as u64 * 4
157                + shift_in_block.z as u64 * 4 * 4,
158        )
159    }
160
161    pub fn node(self, i: u64) -> NodePhysicalId {
162        NodePhysicalId(self.0 + i)
163    }
164}
165
166#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
167#[derive(Clone, Copy, bytemuck::Zeroable)]
168#[repr(C)]
169pub struct DispatchBlock2ActiveBlock {
170    pub active_block_id: BlockHeaderId,
171    pub first_particle: u32,
172}
173
174impl Default for DispatchBlock2ActiveBlock {
175    fn default() -> Self {
176        Self {
177            active_block_id: BlockHeaderId(0),
178            first_particle: 0,
179        }
180    }
181}
182
183#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
184#[derive(Clone, Copy)]
185#[repr(C)]
186pub struct GpuGrid {
187    cell_width: Real,
188    // Double buffering: the curr buffer is read-only, the next buffer is read/write.
189    buffer: DevicePointer<GpuGridNode>,
190    active_blocks: DevicePointer<ActiveBlockHeader>,
191    num_active_blocks: DevicePointer<u32>,
192    pub packed_key_to_active_block: GridHashMap,
193    pub dispatch_block_to_active_block: DevicePointer<DispatchBlock2ActiveBlock>,
194    pub dispatch_halo_block_to_active_block: DevicePointer<DispatchBlock2ActiveBlock>,
195    capacity: u64,
196}
197
198impl GpuGrid {
199    /// Creates a new GPU grid.
200    ///
201    /// The input buffer must be a valid pointer to a buffer able to hold exactly `capacity` grid nodes.
202    pub unsafe fn new(
203        cell_width: Real,
204        capacity: u64,
205        buffer: DevicePointer<GpuGridNode>,
206        active_blocks: DevicePointer<ActiveBlockHeader>,
207        num_active_blocks: DevicePointer<u32>,
208        dispatch_block_to_active_block: DevicePointer<DispatchBlock2ActiveBlock>,
209        dispatch_halo_block_to_active_block: DevicePointer<DispatchBlock2ActiveBlock>,
210        packed_key_to_active_block: GridHashMap,
211    ) -> Self {
212        Self {
213            cell_width,
214            capacity,
215            buffer,
216            active_blocks,
217            num_active_blocks,
218            dispatch_block_to_active_block,
219            dispatch_halo_block_to_active_block,
220            packed_key_to_active_block,
221        }
222    }
223
224    pub fn capacity(&self) -> usize {
225        self.capacity as usize
226    }
227
228    pub fn cell_width(&self) -> Real {
229        self.cell_width
230    }
231
232    pub unsafe fn dispatch_block_to_active_block_unchecked(
233        &self,
234        dispatch_block_id: usize,
235    ) -> DispatchBlock2ActiveBlock {
236        *self
237            .dispatch_block_to_active_block
238            .as_ptr()
239            .add(dispatch_block_id)
240    }
241
242    pub unsafe fn dispatch_block_to_active_block_unchecked_mut(
243        &mut self,
244        dispatch_block_id: usize,
245    ) -> &mut DispatchBlock2ActiveBlock {
246        &mut (*self
247            .dispatch_block_to_active_block
248            .as_mut_ptr()
249            .add(dispatch_block_id))
250    }
251
252    pub unsafe fn active_block_unchecked(&self, block_id: BlockHeaderId) -> &ActiveBlockHeader {
253        &*self.active_blocks.as_ptr().add(block_id.0 as usize)
254    }
255
256    pub unsafe fn active_block_unchecked_mut(
257        &mut self,
258        block_id: BlockHeaderId,
259    ) -> &mut ActiveBlockHeader {
260        &mut *self.active_blocks.as_mut_ptr().add(block_id.0 as usize)
261    }
262
263    pub fn num_active_blocks(&self) -> u32 {
264        unsafe { *self.num_active_blocks.as_ptr() }
265    }
266
267    pub fn num_active_blocks_mut(&mut self) -> &mut u32 {
268        unsafe { &mut *self.num_active_blocks.as_mut_ptr() }
269    }
270
271    pub fn block_associated_to_point(&self, pt: &Point<Real>) -> BlockVirtualId {
272        const OFF_BY_TWO: i64 = 2;
273        let coord = (pt / self.cell_width).map(|e| e.round() as i64);
274        BlockVirtualId::pack(
275            coord
276                .coords
277                .map(|x| (x - OFF_BY_TWO + BlockVirtualId::PACK_ORIGIN as i64 * 4) as usize / 4),
278        )
279    }
280
281    pub fn blocks_associated_to_point(
282        &self,
283        pt: &Point<Real>,
284    ) -> [BlockVirtualId; NUM_ASSOC_BLOCKS] {
285        const OFF_BY_TWO: i64 = 2;
286        let coord = (pt / self.cell_width).map(|e| e.round() as i64);
287        let main_block = coord
288            .coords
289            .map(|x| (x - OFF_BY_TWO + BlockVirtualId::PACK_ORIGIN as i64 * 4) as usize / 4);
290        Self::blocks_associated_to_block(main_block)
291    }
292
293    #[cfg(feature = "dim2")]
294    pub fn blocks_associated_to_block(block: Vector<usize>) -> [BlockVirtualId; 4] {
295        // TODO: we could improve this by:
296        // - Using pre-computed packed shifts, instead of packing the shifted key.
297        // - Selecting only the neighbor blocks actually affected by the particle.
298        [
299            BlockVirtualId::pack(block + vector![0, 0]),
300            BlockVirtualId::pack(block + vector![0, 1]),
301            BlockVirtualId::pack(block + vector![1, 0]),
302            BlockVirtualId::pack(block + vector![1, 1]),
303        ]
304    }
305
306    #[cfg(feature = "dim3")]
307    pub fn blocks_associated_to_block(block: Vector<usize>) -> [BlockVirtualId; 8] {
308        // TODO: we could improve this by:
309        // - Using pre-computed packed shifts, instead of packing the shifted key.
310        // - Selecting only the neighbor blocks actually affected by the particle.
311        [
312            BlockVirtualId::pack(block + vector![0, 0, 0]),
313            BlockVirtualId::pack(block + vector![0, 0, 1]),
314            BlockVirtualId::pack(block + vector![0, 1, 0]),
315            BlockVirtualId::pack(block + vector![0, 1, 1]),
316            BlockVirtualId::pack(block + vector![1, 0, 0]),
317            BlockVirtualId::pack(block + vector![1, 0, 1]),
318            BlockVirtualId::pack(block + vector![1, 1, 0]),
319            BlockVirtualId::pack(block + vector![1, 1, 1]),
320        ]
321    }
322
323    #[cfg(feature = "dim2")]
324    pub fn blocks_transferring_into_block(block: Vector<usize>) -> [BlockVirtualId; 4] {
325        // TODO: we could improve this by:
326        // - Using pre-computed packed shifts, instead of packing the shifted key.
327        // - Selecting only the neighbor blocks actually affected by the particle.
328        [
329            BlockVirtualId::pack(block - vector![0, 0]),
330            BlockVirtualId::pack(block - vector![0, 1]),
331            BlockVirtualId::pack(block - vector![1, 0]),
332            BlockVirtualId::pack(block - vector![1, 1]),
333        ]
334    }
335
336    #[cfg(feature = "dim3")]
337    pub fn blocks_transferring_into_block(block: Vector<usize>) -> [BlockVirtualId; 8] {
338        // TODO: we could improve this by:
339        // - Using pre-computed packed shifts, instead of packing the shifted key.
340        // - Selecting only the neighbor blocks actually affected by the particle.
341        [
342            BlockVirtualId::pack(block - vector![0, 0, 0]),
343            BlockVirtualId::pack(block - vector![0, 0, 1]),
344            BlockVirtualId::pack(block - vector![0, 1, 0]),
345            BlockVirtualId::pack(block - vector![0, 1, 1]),
346            BlockVirtualId::pack(block - vector![1, 0, 0]),
347            BlockVirtualId::pack(block - vector![1, 0, 1]),
348            BlockVirtualId::pack(block - vector![1, 1, 0]),
349            BlockVirtualId::pack(block - vector![1, 1, 1]),
350        ]
351    }
352
353    pub fn mark_block_as_active(&mut self, virtual_id: BlockVirtualId) {
354        let mut hashmap = self.packed_key_to_active_block;
355        hashmap.insert_nonexistant_with(virtual_id, || {
356            // This is the first time we see this block.
357            let block_header_id = unsafe { self.num_active_blocks_mut().global_atomic_add(1) };
358            let active_block =
359                unsafe { self.active_block_unchecked_mut(BlockHeaderId(block_header_id)) };
360            *active_block = ActiveBlockHeader {
361                virtual_id,
362                first_particle: 0,
363                num_particles: 0,
364                halo_state: HaloState::EMPTY,
365            };
366            BlockHeaderId(block_header_id)
367        })
368    }
369
370    pub unsafe fn get_packed_block_mut(
371        &mut self,
372        id: BlockVirtualId,
373    ) -> Option<&mut ActiveBlockHeader> {
374        let active_block_id = self.packed_key_to_active_block.get(id)?;
375        Some(
376            &mut *self
377                .active_blocks
378                .as_mut_ptr()
379                .add(active_block_id.0 as usize),
380        )
381    }
382
383    pub unsafe fn get_header_block_id(&self, id: BlockVirtualId) -> Option<BlockHeaderId> {
384        self.packed_key_to_active_block.get(id)
385    }
386
387    pub unsafe fn get_node_unchecked(&self, id: NodePhysicalId) -> &GpuGridNode {
388        core::mem::transmute(self.buffer.as_ptr().add(id.0 as usize) as *const _)
389    }
390
391    pub fn get_node(&self, id: NodePhysicalId) -> Option<&GpuGridNode> {
392        if id.0 >= self.capacity {
393            None
394        } else {
395            unsafe {
396                Some(core::mem::transmute(
397                    self.buffer.as_ptr().add(id.0 as usize) as *const _,
398                ))
399            }
400        }
401    }
402
403    pub fn get_node_mut(&mut self, id: NodePhysicalId) -> Option<&mut GpuGridNode> {
404        if id.0 >= self.capacity {
405            None
406        } else {
407            unsafe {
408                Some(core::mem::transmute(
409                    self.buffer.as_mut_ptr().add(id.0 as usize),
410                ))
411            }
412        }
413    }
414}
415
416#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
417#[derive(Copy, Clone, Debug, PartialEq, Eq)]
418pub enum GpuGridProjectionStatus {
419    NotComputed,
420    Inside(usize),
421    Outside(usize),
422    TooFar,
423}
424
425impl GpuGridProjectionStatus {
426    pub fn is_inside(self) -> bool {
427        matches!(self, Self::Inside(_))
428    }
429
430    pub fn is_outside(self) -> bool {
431        matches!(self, Self::Outside(_))
432    }
433
434    pub fn flip(self) -> Self {
435        match self {
436            Self::Inside(i) => Self::Outside(i),
437            Self::Outside(i) => Self::Inside(i),
438            _ => self,
439        }
440    }
441}
442
443#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
444#[derive(Copy, Clone, Debug)]
445#[repr(C)]
446pub struct GpuGridNode {
447    pub mass: Real,
448    // That’s where the particles transfer their momentum.
449    // This is then replaced by the velocity during the grid update.
450    pub momentum_velocity: Vector<Real>,
451    // That’s where the particles transfer their momentum.
452    // This is then replaced by the velocity during the grid update.
453    pub psi_momentum_velocity: Real,
454    pub psi_mass: Real,
455    pub prev_mass: Real,
456    pub projection_status: GpuGridProjectionStatus,
457    pub collision_normal: Vector<Real>,
458    pub projection_scaled_dir: Vector<Real>,
459}
460
461impl Default for GpuGridNode {
462    fn default() -> Self {
463        Self {
464            mass: 0.0,
465            momentum_velocity: Vector::zeros(),
466            psi_momentum_velocity: 0.0,
467            psi_mass: 0.0,
468            prev_mass: 0.0,
469            projection_status: GpuGridProjectionStatus::NotComputed,
470            collision_normal: Vector::zeros(),
471            projection_scaled_dir: Vector::zeros(),
472        }
473    }
474}
475
476#[cfg_attr(not(target_os = "cuda"), derive(cust::DeviceCopy))]
477#[derive(Copy, Clone)]
478#[repr(C)]
479pub struct HaloBlockData {
480    pub virtual_id: BlockVirtualId,
481    pub cells: [GpuGridNode; NUM_CELL_PER_BLOCK as usize],
482}
483
484impl Default for HaloBlockData {
485    fn default() -> Self {
486        Self {
487            virtual_id: BlockVirtualId(0),
488            cells: [GpuGridNode::default(); NUM_CELL_PER_BLOCK as usize],
489        }
490    }
491}