#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ptr::NonNull;
use std::alloc::{alloc, dealloc, Layout};
use std::fmt;
#[derive(Debug)]
#[non_exhaustive]
pub enum StreamingError {
AllocationFailed { size: usize, alignment: usize },
}
impl fmt::Display for StreamingError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::AllocationFailed { size, alignment } => write!(f, "failed to statically map 128-bit registry block arrays directly sized {} aligned to {}", size, alignment),
}
}
}
impl std::error::Error for StreamingError {}
pub struct NonTemporalWriter {
pub memory_base: NonNull<__m128i>, layout: Layout,
}
impl NonTemporalWriter {
pub fn new(capacity_bytes: usize) -> Result<Self, StreamingError> {
let align = 16;
let layout = Layout::from_size_align(capacity_bytes, align).map_err(|_| {
StreamingError::AllocationFailed { size: capacity_bytes, alignment: align }
})?;
let p = unsafe { alloc(layout) };
let memory_base = NonNull::new(p as *mut __m128i).ok_or(StreamingError::AllocationFailed {
size: capacity_bytes,
alignment: align
})?;
Ok(Self { memory_base, layout })
}
#[cfg(target_feature = "sse2")]
#[inline(always)]
pub unsafe fn write_bypassing_cache(&self, output_buffer: *mut __m128i, payload_chunk: __m128i) {
_mm_stream_si128(output_buffer, payload_chunk);
}
pub fn flush_combining_buffers() {
unsafe {
_mm_sfence();
}
}
}
impl Drop for NonTemporalWriter {
fn drop(&mut self) {
unsafe {
dealloc(self.memory_base.as_ptr() as *mut u8, self.layout);
}
}
}