polars_utils/
mem.rs

1use std::sync::LazyLock;
2
3/// # Safety
4/// This may break aliasing rules, make sure you are the only owner.
5#[allow(clippy::mut_from_ref)]
6pub unsafe fn to_mutable_slice<T: Copy>(s: &[T]) -> &mut [T] {
7    let ptr = s.as_ptr() as *mut T;
8    let len = s.len();
9    unsafe { std::slice::from_raw_parts_mut(ptr, len) }
10}
11
12pub static PAGE_SIZE: LazyLock<usize> = LazyLock::new(|| {
13    #[cfg(target_family = "unix")]
14    unsafe {
15        libc::sysconf(libc::_SC_PAGESIZE) as usize
16    }
17    #[cfg(not(target_family = "unix"))]
18    {
19        4096
20    }
21});
22
23pub mod prefetch {
24    use super::PAGE_SIZE;
25
26    /// # Safety
27    ///
28    /// This should only be called with pointers to valid memory.
29    unsafe fn prefetch_l2_impl(ptr: *const u8) {
30        #[cfg(target_arch = "x86_64")]
31        {
32            use std::arch::x86_64::*;
33            unsafe { _mm_prefetch(ptr as *const _, _MM_HINT_T1) };
34        }
35
36        #[cfg(all(target_arch = "aarch64", feature = "nightly"))]
37        {
38            use std::arch::aarch64::*;
39            unsafe { _prefetch(ptr as *const _, _PREFETCH_READ, _PREFETCH_LOCALITY2) };
40        }
41    }
42
43    /// Attempt to prefetch the memory in the slice to the L2 cache.
44    pub fn prefetch_l2(slice: &[u8]) {
45        if slice.is_empty() {
46            return;
47        }
48
49        // @TODO: We can play a bit more with this prefetching. Maybe introduce a maximum number of
50        // prefetches as to not overwhelm the processor. The linear prefetcher should pick it up
51        // at a certain point.
52
53        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
54            unsafe { prefetch_l2_impl(slice[i..].as_ptr()) };
55        }
56
57        unsafe { prefetch_l2_impl(slice[slice.len() - 1..].as_ptr()) }
58    }
59
60    /// `madvise()` with `MADV_SEQUENTIAL` on unix systems. This is a no-op on non-unix systems.
61    pub fn madvise_sequential(#[allow(unused)] slice: &[u8]) {
62        #[cfg(target_family = "unix")]
63        madvise(slice, libc::MADV_SEQUENTIAL);
64    }
65
66    /// `madvise()` with `MADV_WILLNEED` on unix systems. This is a no-op on non-unix systems.
67    pub fn madvise_willneed(#[allow(unused)] slice: &[u8]) {
68        #[cfg(target_family = "unix")]
69        madvise(slice, libc::MADV_WILLNEED);
70    }
71
72    /// `madvise()` with `MADV_POPULATE_READ` on linux systems. This a no-op on non-linux systems.
73    pub fn madvise_populate_read(#[allow(unused)] slice: &[u8]) {
74        #[cfg(target_os = "linux")]
75        madvise(slice, libc::MADV_POPULATE_READ);
76    }
77
78    /// Forcibly reads at least one byte each page.
79    pub fn force_populate_read(slice: &[u8]) {
80        for i in (0..slice.len()).step_by(*PAGE_SIZE) {
81            std::hint::black_box(slice[i]);
82        }
83
84        std::hint::black_box(slice.last().copied());
85    }
86
87    #[cfg(target_family = "unix")]
88    fn madvise(slice: &[u8], advice: libc::c_int) {
89        if slice.is_empty() {
90            return;
91        }
92        let ptr = slice.as_ptr();
93
94        let align = ptr as usize % *PAGE_SIZE;
95        let ptr = ptr.wrapping_sub(align);
96        let len = slice.len() + align;
97
98        if unsafe { libc::madvise(ptr as *mut libc::c_void, len, advice) } != 0 {
99            let err = std::io::Error::last_os_error();
100            if let std::io::ErrorKind::InvalidInput = err.kind() {
101                panic!("{}", err);
102            }
103        }
104    }
105
106    pub fn no_prefetch(_: &[u8]) {}
107
108    /// Get the configured memory prefetch function.
109    pub fn get_memory_prefetch_func(verbose: bool) -> fn(&[u8]) -> () {
110        let memory_prefetch_func = match std::env::var("POLARS_MEMORY_PREFETCH").ok().as_deref() {
111            None => {
112                // madvise_willneed performed the best on both MacOS on Apple Silicon and Ubuntu on x86-64,
113                // using PDS-H query 3 SF=10 after clearing file cache as a benchmark.
114                #[cfg(target_family = "unix")]
115                {
116                    madvise_willneed
117                }
118                #[cfg(not(target_family = "unix"))]
119                {
120                    no_prefetch
121                }
122            },
123            Some("no_prefetch") => no_prefetch,
124            Some("prefetch_l2") => prefetch_l2,
125            Some("madvise_sequential") => {
126                #[cfg(target_family = "unix")]
127                {
128                    madvise_sequential
129                }
130                #[cfg(not(target_family = "unix"))]
131                {
132                    panic!(
133                        "POLARS_MEMORY_PREFETCH=madvise_sequential is not supported by this system"
134                    );
135                }
136            },
137            Some("madvise_willneed") => {
138                #[cfg(target_family = "unix")]
139                {
140                    madvise_willneed
141                }
142                #[cfg(not(target_family = "unix"))]
143                {
144                    panic!(
145                        "POLARS_MEMORY_PREFETCH=madvise_willneed is not supported by this system"
146                    );
147                }
148            },
149            Some("madvise_populate_read") => {
150                #[cfg(target_os = "linux")]
151                {
152                    madvise_populate_read
153                }
154                #[cfg(not(target_os = "linux"))]
155                {
156                    panic!(
157                        "POLARS_MEMORY_PREFETCH=madvise_populate_read is not supported by this system"
158                    );
159                }
160            },
161            Some("force_populate_read") => force_populate_read,
162            Some(v) => panic!("invalid value for POLARS_MEMORY_PREFETCH: {}", v),
163        };
164
165        if verbose {
166            let func_name = match memory_prefetch_func as usize {
167                v if v == no_prefetch as usize => "no_prefetch",
168                v if v == prefetch_l2 as usize => "prefetch_l2",
169                v if v == madvise_sequential as usize => "madvise_sequential",
170                v if v == madvise_willneed as usize => "madvise_willneed",
171                v if v == madvise_populate_read as usize => "madvise_populate_read",
172                v if v == force_populate_read as usize => "force_populate_read",
173                _ => unreachable!(),
174            };
175
176            eprintln!("memory prefetch function: {}", func_name);
177        }
178
179        memory_prefetch_func
180    }
181}