ringkernel_wavesim/simulation/
fdtd_dsl.rs1#[cfg(feature = "cuda-codegen")]
10use ringkernel_cuda_codegen::{transpile_stencil_kernel, Grid, StencilConfig};
11
12#[cfg(feature = "cuda-codegen")]
14pub use ringkernel_cuda_codegen::GridPos;
15
16#[cfg(not(feature = "cuda-codegen"))]
21#[derive(Debug, Clone, Copy)]
22pub struct GridPos {
23 index: usize,
25 buffer_width: usize,
27}
28
29#[cfg(not(feature = "cuda-codegen"))]
30impl GridPos {
31 pub fn new(index: usize, buffer_width: usize) -> Self {
37 Self {
38 index,
39 buffer_width,
40 }
41 }
42
43 pub fn from_local(lx: usize, ly: usize, tile_size: usize, halo: usize) -> Self {
51 let buffer_width = tile_size + 2 * halo;
52 let index = (ly + halo) * buffer_width + (lx + halo);
53 Self {
54 index,
55 buffer_width,
56 }
57 }
58
59 #[inline]
61 pub fn idx(&self) -> usize {
62 self.index
63 }
64
65 #[inline]
67 pub fn north<T: Copy>(&self, buf: &[T]) -> T {
68 buf[self.index - self.buffer_width]
69 }
70
71 #[inline]
73 pub fn south<T: Copy>(&self, buf: &[T]) -> T {
74 buf[self.index + self.buffer_width]
75 }
76
77 #[inline]
79 pub fn east<T: Copy>(&self, buf: &[T]) -> T {
80 buf[self.index + 1]
81 }
82
83 #[inline]
85 pub fn west<T: Copy>(&self, buf: &[T]) -> T {
86 buf[self.index - 1]
87 }
88
89 #[inline]
91 pub fn at<T: Copy>(&self, buf: &[T], dx: i32, dy: i32) -> T {
92 let offset = dy as isize * self.buffer_width as isize + dx as isize;
93 buf[(self.index as isize + offset) as usize]
94 }
95
96 #[inline]
100 pub fn up<T: Copy>(&self, buf: &[T]) -> T {
101 self.north(buf)
102 }
103
104 #[inline]
108 pub fn down<T: Copy>(&self, buf: &[T]) -> T {
109 self.south(buf)
110 }
111}
112
113pub fn fdtd_wave_step_cpu(
118 pressure: &[f32],
119 pressure_prev: &mut [f32],
120 c2: f32,
121 damping: f32,
122 idx: usize,
123 buffer_width: usize,
124) {
125 let p = pressure[idx];
126 let p_prev = pressure_prev[idx];
127
128 let laplacian = pressure[idx - buffer_width] + pressure[idx + buffer_width] + pressure[idx + 1] + pressure[idx - 1] - 4.0 * p;
134
135 let p_new = 2.0 * p - p_prev + c2 * laplacian;
137
138 pressure_prev[idx] = p_new * damping;
140}
141
142#[cfg(not(feature = "cuda-codegen"))]
158pub fn fdtd_wave_step_dsl(
159 pressure: &[f32],
160 pressure_prev: &mut [f32],
161 c2: f32,
162 damping: f32,
163 pos: GridPos,
164) {
165 let p = pressure[pos.idx()];
166 let p_prev = pressure_prev[pos.idx()];
167 let laplacian =
168 pos.north(pressure) + pos.south(pressure) + pos.east(pressure) + pos.west(pressure)
169 - 4.0 * p;
170 let p_new = 2.0 * p - p_prev + c2 * laplacian;
171 pressure_prev[pos.idx()] = p_new * damping;
172}
173
174#[cfg(not(feature = "cuda-codegen"))]
178pub fn fdtd_tile_step_dsl(
179 pressure: &[f32],
180 pressure_prev: &mut [f32],
181 c2: f32,
182 damping: f32,
183 tile_size: usize,
184 halo: usize,
185) {
186 for ly in 0..tile_size {
187 for lx in 0..tile_size {
188 let pos = GridPos::from_local(lx, ly, tile_size, halo);
189 fdtd_wave_step_dsl(pressure, pressure_prev, c2, damping, pos);
190 }
191 }
192}
193
194#[cfg(feature = "cuda-codegen")]
199pub fn generate_fdtd_cuda() -> String {
200 use syn::parse_quote;
201
202 let kernel_fn: syn::ItemFn = parse_quote! {
204 fn fdtd_wave_step(
205 pressure: &[f32],
206 pressure_prev: &mut [f32],
207 c2: f32,
208 damping: f32,
209 pos: GridPos,
210 ) {
211 let p = pressure[pos.idx()];
212 let p_prev = pressure_prev[pos.idx()];
213 let laplacian = pos.north(pressure)
214 + pos.south(pressure)
215 + pos.east(pressure)
216 + pos.west(pressure)
217 - 4.0 * p;
218 let p_new = 2.0 * p - p_prev + c2 * laplacian;
219 pressure_prev[pos.idx()] = p_new * damping;
220 }
221 };
222
223 let config = StencilConfig::new("fdtd_wave_step")
225 .with_grid(Grid::Grid2D)
226 .with_tile_size(16, 16)
227 .with_halo(1);
228
229 match transpile_stencil_kernel(&kernel_fn, &config) {
231 Ok(cuda) => cuda,
232 Err(e) => format!("// Transpilation error: {}", e),
233 }
234}
235
236#[cfg(not(feature = "cuda-codegen"))]
237pub fn generate_fdtd_cuda() -> String {
238 "// CUDA codegen not enabled".to_string()
239}
240
241pub fn handwritten_fdtd_cuda() -> &'static str {
243 include_str!("../shaders/fdtd_tile.cu")
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn test_cpu_fdtd_step() {
252 let buffer_width = 6;
254 let mut pressure = vec![0.0f32; 36];
255 let mut pressure_prev = vec![0.0f32; 36];
256
257 pressure[14] = 1.0; fdtd_wave_step_cpu(&pressure, &mut pressure_prev, 0.25, 0.99, 14, buffer_width);
262
263 assert!((pressure_prev[14] - 0.99).abs() < 0.01);
267 }
268
269 #[test]
270 #[cfg(feature = "cuda-codegen")]
271 fn test_generated_cuda_source() {
272 let source = generate_fdtd_cuda();
273
274 assert!(
276 source.contains("extern \"C\" __global__"),
277 "Should have CUDA kernel declaration"
278 );
279 assert!(source.contains("threadIdx.x"), "Should use thread index X");
280 assert!(source.contains("threadIdx.y"), "Should use thread index Y");
281 assert!(
282 source.contains("buffer_width = 18"),
283 "Should have correct buffer width (16 + 2*1)"
284 );
285
286 println!("Generated CUDA:\n{}", source);
287 }
288
289 #[test]
290 #[cfg(feature = "cuda-codegen")]
291 fn test_cuda_source_structure() {
292 let source = generate_fdtd_cuda();
293
294 assert!(
296 source.contains("if (lx >= 16 || ly >= 16) return;"),
297 "Should have bounds check"
298 );
299 assert!(
300 source.contains("float p ="),
301 "Should have pressure variable"
302 );
303
304 assert!(
306 source.contains("- 18") || source.contains("- buffer_width"),
307 "Should access north neighbor"
308 );
309 assert!(
310 source.contains("+ 18") || source.contains("+ buffer_width"),
311 "Should access south neighbor"
312 );
313 }
314
315 #[test]
316 #[cfg(not(feature = "cuda-codegen"))]
317 fn test_grid_pos_neighbor_access() {
318 let buffer: Vec<f32> = (0..36).map(|i| i as f32).collect();
328 let pos = GridPos::new(14, 6); assert_eq!(pos.idx(), 14);
331 assert_eq!(pos.north(&buffer), 8.0); assert_eq!(pos.south(&buffer), 20.0); assert_eq!(pos.east(&buffer), 15.0); assert_eq!(pos.west(&buffer), 13.0); }
336
337 #[test]
338 #[cfg(not(feature = "cuda-codegen"))]
339 fn test_grid_pos_from_local() {
340 let pos = GridPos::from_local(0, 0, 16, 1);
342 assert_eq!(pos.idx(), 19);
344
345 let pos = GridPos::from_local(15, 15, 16, 1);
346 assert_eq!(pos.idx(), 304);
348 }
349
350 #[test]
351 #[cfg(not(feature = "cuda-codegen"))]
352 fn test_grid_pos_at_offset() {
353 let buffer: Vec<f32> = (0..36).map(|i| i as f32).collect();
354 let pos = GridPos::new(14, 6);
355
356 assert_eq!(pos.at(&buffer, 0, 0), buffer[14]);
358
359 assert_eq!(pos.at(&buffer, 1, 0), buffer[15]);
361
362 assert_eq!(pos.at(&buffer, -1, 0), buffer[13]);
364
365 assert_eq!(pos.at(&buffer, 0, -1), buffer[8]);
367
368 assert_eq!(pos.at(&buffer, 0, 1), buffer[20]);
370
371 assert_eq!(pos.at(&buffer, 1, -1), buffer[9]);
373 }
374
375 #[test]
376 #[cfg(not(feature = "cuda-codegen"))]
377 fn test_dsl_matches_cpu() {
378 let buffer_width = 6;
380 let tile_size = 4;
381 let halo = 1;
382
383 let mut pressure1 = vec![0.0f32; 36];
384 let mut pressure1_prev = vec![0.0f32; 36];
385 let mut pressure2 = vec![0.0f32; 36];
386 let mut pressure2_prev = vec![0.0f32; 36];
387
388 pressure1[14] = 1.0;
390 pressure2[14] = 1.0;
391
392 let c2 = 0.25f32;
393 let damping = 0.99f32;
394
395 for ly in 0..tile_size {
397 for lx in 0..tile_size {
398 let idx = (ly + halo) * buffer_width + (lx + halo);
399 fdtd_wave_step_cpu(
400 &pressure1,
401 &mut pressure1_prev,
402 c2,
403 damping,
404 idx,
405 buffer_width,
406 );
407 }
408 }
409
410 fdtd_tile_step_dsl(
412 &pressure2,
413 &mut pressure2_prev,
414 c2,
415 damping,
416 tile_size,
417 halo,
418 );
419
420 for i in 0..36 {
422 assert!(
423 (pressure1_prev[i] - pressure2_prev[i]).abs() < 1e-6,
424 "Mismatch at index {}: CPU={}, DSL={}",
425 i,
426 pressure1_prev[i],
427 pressure2_prev[i]
428 );
429 }
430 }
431}