Skip to main content

oxigdal_core/
simd_buffer.rs

1//! SIMD-aligned buffer management for high-performance raster operations
2//!
3//! This module provides buffers with guaranteed alignment for efficient SIMD operations.
4//! It ensures that data is properly aligned for AVX-512 (64 bytes), AVX2 (32 bytes),
5//! or SSE2/NEON (16 bytes) instructions.
6//!
7//! # Features
8//!
9//! - **Configurable Alignment**: Support for 16, 32, and 64-byte alignment
10//! - **Zero-Copy Views**: Create strided views without copying data
11//! - **Tiled Access**: Cache-friendly tiled iteration for large rasters
12//! - **Type-Safe**: Generic over element types with proper trait bounds
13
14// Unsafe code is necessary for aligned memory allocation and SIMD operations
15#![allow(unsafe_code)]
16
17//! # Example
18//!
19//! ```rust
20//! use oxigdal_core::simd_buffer::AlignedBuffer;
21//! use oxigdal_core::error::Result;
22//!
23//! # fn main() -> Result<()> {
24//! // Create a 64-byte aligned buffer for f32 data
25//! let mut buffer = AlignedBuffer::<f32>::new(1000, 64)?;
26//!
27//! // Fill with data
28//! for (i, val) in buffer.as_mut_slice().iter_mut().enumerate() {
29//!     *val = i as f32;
30//! }
31//!
32//! // Access as slice
33//! let sum: f32 = buffer.as_slice().iter().sum();
34//! assert_eq!(sum, 499500.0);
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! # Cache-Friendly Tiling
40//!
41//! For large rasters, tiled iteration improves cache locality:
42//!
43//! ```rust
44//! use oxigdal_core::simd_buffer::TiledBuffer;
45//! use oxigdal_core::error::Result;
46//!
47//! # fn main() -> Result<()> {
48//! let buffer: TiledBuffer<f32> = TiledBuffer::new(1024, 1024, 64, 64)?;
49//!
50//! for tile in buffer.tiles() {
51//!     // Process each 64x64 tile independently
52//!     // Better cache locality and SIMD-friendly
53//! }
54//! # Ok(())
55//! # }
56//! ```
57
58use std::alloc::{self, Layout};
59use std::ptr::NonNull;
60use std::slice;
61
62use crate::error::{OxiGdalError, Result};
63
64/// A buffer with guaranteed SIMD-friendly alignment
65///
66/// This buffer ensures that data is aligned to the specified boundary,
67/// which is critical for efficient SIMD operations. It uses Rust's
68/// global allocator with custom alignment.
69pub struct AlignedBuffer<T> {
70    /// Pointer to the aligned data
71    ptr: NonNull<T>,
72    /// Number of elements
73    len: usize,
74    /// Alignment in bytes
75    align: usize,
76    /// Layout for deallocation
77    layout: Layout,
78}
79
80impl<T> AlignedBuffer<T> {
81    /// Create a new aligned buffer with the specified capacity and alignment
82    ///
83    /// # Arguments
84    ///
85    /// * `capacity` - Number of elements to allocate
86    /// * `align` - Alignment in bytes (must be a power of 2)
87    ///
88    /// # Errors
89    ///
90    /// Returns an error if:
91    /// - Alignment is not a power of 2
92    /// - Alignment is less than the natural alignment of T
93    /// - Memory allocation fails
94    pub fn new(capacity: usize, align: usize) -> Result<Self> {
95        if !align.is_power_of_two() {
96            return Err(OxiGdalError::InvalidParameter {
97                parameter: "align",
98                message: "Alignment must be a power of 2".to_string(),
99            });
100        }
101
102        if align < std::mem::align_of::<T>() {
103            return Err(OxiGdalError::InvalidParameter {
104                parameter: "align",
105                message: format!(
106                    "Alignment {} is less than natural alignment of {}",
107                    align,
108                    std::mem::align_of::<T>()
109                ),
110            });
111        }
112
113        if capacity == 0 {
114            return Err(OxiGdalError::InvalidParameter {
115                parameter: "capacity",
116                message: "Capacity must be greater than 0".to_string(),
117            });
118        }
119
120        let size = capacity
121            .checked_mul(std::mem::size_of::<T>())
122            .ok_or_else(|| OxiGdalError::InvalidParameter {
123                parameter: "capacity",
124                message: "Capacity overflow".to_string(),
125            })?;
126
127        let layout = Layout::from_size_align(size, align).map_err(|e| OxiGdalError::Internal {
128            message: format!("Invalid layout: {e}"),
129        })?;
130
131        // Safety: We've validated the layout above
132        let ptr = unsafe { alloc::alloc(layout) };
133
134        let ptr = NonNull::new(ptr)
135            .ok_or_else(|| OxiGdalError::Internal {
136                message: "Failed to allocate aligned buffer".to_string(),
137            })?
138            .cast::<T>();
139
140        Ok(Self {
141            ptr,
142            len: capacity,
143            align,
144            layout,
145        })
146    }
147
148    /// Create a new aligned buffer filled with zeros
149    ///
150    /// # Arguments
151    ///
152    /// * `capacity` - Number of elements to allocate
153    /// * `align` - Alignment in bytes (must be a power of 2)
154    ///
155    /// # Errors
156    ///
157    /// Returns an error if allocation fails
158    pub fn zeros(capacity: usize, align: usize) -> Result<Self>
159    where
160        T: Default + Copy,
161    {
162        let buffer = Self::new(capacity, align)?;
163
164        // Safety: The buffer is properly allocated and we have exclusive access
165        unsafe {
166            std::ptr::write_bytes(buffer.ptr.as_ptr(), 0, capacity);
167        }
168
169        Ok(buffer)
170    }
171
172    /// Get the number of elements in the buffer
173    #[must_use]
174    pub const fn len(&self) -> usize {
175        self.len
176    }
177
178    /// Check if the buffer is empty
179    #[must_use]
180    pub const fn is_empty(&self) -> bool {
181        self.len == 0
182    }
183
184    /// Get the alignment of the buffer
185    #[must_use]
186    pub const fn alignment(&self) -> usize {
187        self.align
188    }
189
190    /// Get a raw pointer to the buffer
191    #[must_use]
192    pub fn as_ptr(&self) -> *const T {
193        self.ptr.as_ptr()
194    }
195
196    /// Get a mutable raw pointer to the buffer
197    #[must_use]
198    pub fn as_mut_ptr(&mut self) -> *mut T {
199        self.ptr.as_ptr()
200    }
201
202    /// Get the buffer as a slice
203    #[must_use]
204    pub fn as_slice(&self) -> &[T] {
205        // Safety: The buffer is properly allocated with `len` elements
206        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
207    }
208
209    /// Get the buffer as a mutable slice
210    #[must_use]
211    pub fn as_mut_slice(&mut self) -> &mut [T] {
212        // Safety: The buffer is properly allocated with `len` elements
213        unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
214    }
215
216    /// Copy data from a slice into the buffer
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if the slice length doesn't match the buffer capacity
221    pub fn copy_from_slice(&mut self, src: &[T]) -> Result<()>
222    where
223        T: Copy,
224    {
225        if src.len() != self.len {
226            return Err(OxiGdalError::InvalidParameter {
227                parameter: "src",
228                message: format!(
229                    "Source length {} doesn't match buffer capacity {}",
230                    src.len(),
231                    self.len
232                ),
233            });
234        }
235
236        self.as_mut_slice().copy_from_slice(src);
237        Ok(())
238    }
239
240    /// Create a strided view of the buffer
241    ///
242    /// This is useful for accessing every nth element without copying data.
243    ///
244    /// # Arguments
245    ///
246    /// * `stride` - Step size between elements
247    ///
248    /// # Errors
249    ///
250    /// Returns an error if stride is 0
251    pub fn strided_view(&self, stride: usize) -> Result<StridedView<'_, T>> {
252        if stride == 0 {
253            return Err(OxiGdalError::InvalidParameter {
254                parameter: "stride",
255                message: "Stride must be greater than 0".to_string(),
256            });
257        }
258
259        Ok(StridedView {
260            buffer: self.as_slice(),
261            stride,
262        })
263    }
264}
265
266impl<T> Drop for AlignedBuffer<T> {
267    fn drop(&mut self) {
268        // Safety: The pointer was allocated with this layout
269        unsafe {
270            alloc::dealloc(self.ptr.as_ptr().cast::<u8>(), self.layout);
271        }
272    }
273}
274
275// Safety: AlignedBuffer can be sent to another thread if T can be sent
276unsafe impl<T: Send> Send for AlignedBuffer<T> {}
277
278// Safety: AlignedBuffer can be shared between threads if T can be shared
279unsafe impl<T: Sync> Sync for AlignedBuffer<T> {}
280
281/// A strided view into a buffer for accessing every nth element
282pub struct StridedView<'a, T> {
283    buffer: &'a [T],
284    stride: usize,
285}
286
287impl<T> StridedView<'_, T> {
288    /// Get the number of elements in the strided view
289    #[must_use]
290    pub fn len(&self) -> usize {
291        self.buffer.len().div_ceil(self.stride)
292    }
293
294    /// Check if the view is empty
295    #[must_use]
296    pub fn is_empty(&self) -> bool {
297        self.buffer.is_empty()
298    }
299
300    /// Get an element at the specified index
301    #[must_use]
302    pub fn get(&self, index: usize) -> Option<&T> {
303        let offset = index * self.stride;
304        self.buffer.get(offset)
305    }
306
307    /// Create an iterator over the strided elements
308    #[must_use]
309    pub fn iter(&self) -> StridedIterator<'_, T> {
310        StridedIterator {
311            buffer: self.buffer,
312            stride: self.stride,
313            index: 0,
314        }
315    }
316}
317
318/// Iterator for strided buffer access
319pub struct StridedIterator<'a, T> {
320    buffer: &'a [T],
321    stride: usize,
322    index: usize,
323}
324
325impl<'a, T> Iterator for StridedIterator<'a, T> {
326    type Item = &'a T;
327
328    fn next(&mut self) -> Option<Self::Item> {
329        let offset = self.index * self.stride;
330        if offset < self.buffer.len() {
331            self.index += 1;
332            Some(&self.buffer[offset])
333        } else {
334            None
335        }
336    }
337}
338
339/// A tiled buffer for cache-friendly access patterns
340///
341/// Large rasters can be divided into tiles for better cache locality.
342/// This is especially important for SIMD operations on multi-megabyte datasets.
343pub struct TiledBuffer<T> {
344    buffer: AlignedBuffer<T>,
345    width: usize,
346    height: usize,
347    tile_width: usize,
348    tile_height: usize,
349}
350
351impl<T: Default + Copy> TiledBuffer<T> {
352    /// Create a new tiled buffer
353    ///
354    /// # Arguments
355    ///
356    /// * `width` - Total width in elements
357    /// * `height` - Total height in elements
358    /// * `tile_width` - Tile width
359    /// * `tile_height` - Tile height
360    ///
361    /// # Errors
362    ///
363    /// Returns an error if allocation fails or dimensions are invalid
364    pub fn new(width: usize, height: usize, tile_width: usize, tile_height: usize) -> Result<Self> {
365        if tile_width == 0 || tile_height == 0 {
366            return Err(OxiGdalError::InvalidParameter {
367                parameter: "tile_size",
368                message: "Tile dimensions must be greater than 0".to_string(),
369            });
370        }
371
372        let capacity = width
373            .checked_mul(height)
374            .ok_or_else(|| OxiGdalError::Internal {
375                message: "Buffer size overflow".to_string(),
376            })?;
377
378        let buffer = AlignedBuffer::zeros(capacity, 64)?;
379
380        Ok(Self {
381            buffer,
382            width,
383            height,
384            tile_width,
385            tile_height,
386        })
387    }
388
389    /// Get the total width
390    #[must_use]
391    pub const fn width(&self) -> usize {
392        self.width
393    }
394
395    /// Get the total height
396    #[must_use]
397    pub const fn height(&self) -> usize {
398        self.height
399    }
400
401    /// Get an iterator over tiles
402    #[must_use]
403    pub fn tiles(&self) -> TileIterator<'_, T> {
404        TileIterator {
405            buffer: &self.buffer,
406            width: self.width,
407            height: self.height,
408            tile_width: self.tile_width,
409            tile_height: self.tile_height,
410            current_x: 0,
411            current_y: 0,
412        }
413    }
414
415    /// Get the underlying buffer
416    #[must_use]
417    pub const fn buffer(&self) -> &AlignedBuffer<T> {
418        &self.buffer
419    }
420}
421
422/// Iterator over tiles in a tiled buffer
423pub struct TileIterator<'a, T> {
424    #[allow(dead_code)]
425    buffer: &'a AlignedBuffer<T>,
426    width: usize,
427    height: usize,
428    tile_width: usize,
429    tile_height: usize,
430    current_x: usize,
431    current_y: usize,
432}
433
434/// A tile from a tiled buffer
435pub struct Tile {
436    /// X offset in the parent buffer
437    pub x: usize,
438    /// Y offset in the parent buffer
439    pub y: usize,
440    /// Tile width
441    pub width: usize,
442    /// Tile height
443    pub height: usize,
444}
445
446impl<T> Iterator for TileIterator<'_, T> {
447    type Item = Tile;
448
449    fn next(&mut self) -> Option<Self::Item> {
450        if self.current_y >= self.height {
451            return None;
452        }
453
454        let tile = Tile {
455            x: self.current_x,
456            y: self.current_y,
457            width: self.tile_width.min(self.width - self.current_x),
458            height: self.tile_height.min(self.height - self.current_y),
459        };
460
461        // Move to next tile
462        self.current_x += self.tile_width;
463        if self.current_x >= self.width {
464            self.current_x = 0;
465            self.current_y += self.tile_height;
466        }
467
468        Some(tile)
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn test_aligned_buffer_creation() {
478        let buffer = AlignedBuffer::<f32>::new(100, 64)
479            .expect("Failed to create aligned buffer with valid parameters");
480        assert_eq!(buffer.len(), 100);
481        assert_eq!(buffer.alignment(), 64);
482        assert!(!buffer.is_empty());
483
484        // Check alignment
485        let ptr = buffer.as_ptr();
486        assert_eq!((ptr as usize) % 64, 0);
487    }
488
489    #[test]
490    fn test_aligned_buffer_zeros() {
491        let buffer = AlignedBuffer::<f32>::zeros(100, 64)
492            .expect("Failed to create zero-initialized aligned buffer");
493        for val in buffer.as_slice() {
494            assert_eq!(*val, 0.0);
495        }
496    }
497
498    #[test]
499    fn test_aligned_buffer_copy() {
500        let mut buffer =
501            AlignedBuffer::<f32>::new(10, 64).expect("Failed to create aligned buffer");
502        let data: Vec<f32> = (0..10).map(|i| i as f32).collect();
503
504        buffer
505            .copy_from_slice(&data)
506            .expect("Failed to copy data to aligned buffer");
507
508        for (i, val) in buffer.as_slice().iter().enumerate() {
509            assert_eq!(*val, i as f32);
510        }
511    }
512
513    #[test]
514    fn test_strided_view() {
515        let mut buffer =
516            AlignedBuffer::<f32>::new(10, 64).expect("Failed to create aligned buffer");
517        let data: Vec<f32> = (0..10).map(|i| i as f32).collect();
518        buffer
519            .copy_from_slice(&data)
520            .expect("Failed to copy data to buffer");
521
522        let view = buffer
523            .strided_view(2)
524            .expect("Failed to create strided view");
525        assert_eq!(view.len(), 5);
526
527        let values: Vec<f32> = view.iter().copied().collect();
528        assert_eq!(values, vec![0.0, 2.0, 4.0, 6.0, 8.0]);
529    }
530
531    #[test]
532    fn test_tiled_buffer() {
533        let buffer =
534            TiledBuffer::<f32>::new(100, 100, 32, 32).expect("Failed to create tiled buffer");
535        assert_eq!(buffer.width(), 100);
536        assert_eq!(buffer.height(), 100);
537
538        let tile_count = buffer.tiles().count();
539        // 100x100 with 32x32 tiles = 4x4 = 16 tiles
540        assert_eq!(tile_count, 16);
541    }
542
543    #[test]
544    fn test_tile_dimensions() {
545        let buffer =
546            TiledBuffer::<f32>::new(100, 100, 32, 32).expect("Failed to create tiled buffer");
547        let tiles: Vec<Tile> = buffer.tiles().collect();
548
549        // Check first tile
550        assert_eq!(tiles[0].x, 0);
551        assert_eq!(tiles[0].y, 0);
552        assert_eq!(tiles[0].width, 32);
553        assert_eq!(tiles[0].height, 32);
554
555        // Check last tile (partial)
556        let last = &tiles[15];
557        assert_eq!(last.x, 96);
558        assert_eq!(last.y, 96);
559        assert_eq!(last.width, 4); // 100 - 96 = 4
560        assert_eq!(last.height, 4);
561    }
562
563    #[test]
564    fn test_invalid_alignment() {
565        // Non-power-of-2
566        let result = AlignedBuffer::<f32>::new(100, 63);
567        assert!(result.is_err());
568
569        // Too small
570        let result = AlignedBuffer::<f32>::new(100, 1);
571        assert!(result.is_err());
572    }
573
574    #[test]
575    fn test_zero_capacity() {
576        let result = AlignedBuffer::<f32>::new(0, 64);
577        assert!(result.is_err());
578    }
579}