Skip to main content

clf/
lib.rs

1/*!
2Flush the data cache line.
3
4This crate can be used when you do benchmarks that are not dependent on the cpu cache.
5
6# Supports
7
8- x86_64, aarch64 (native implementation)
9- mips64el, powerpc64le ... etc (fallback to `__builtin_clear_cache`)
10- minimum support rustc 1.70.0 (due to `core::arch` and `asm!`)
11
12# Examples
13Easy to use:
14
15```rust
16let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
17clf::cache_line_flush_with_slice(&a);
18```
19
20or
21
22```rust
23let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
24let begin_ptr = a.as_ptr();
25let end_ptr = unsafe { begin_ptr.add(a.len()) };
26unsafe { clf::cache_line_flush_with_ptr(begin_ptr, end_ptr) };
27```
28
29# References
30
31[CPU cache](https://en.wikipedia.org/wiki/CPU_cache)
32
33# Benchmarking
34
35To measure the effectiveness of the cache flushing, you can run the included benchmarks:
36
37```text
38make bench
39```
40
41This will compare the access time of "warm" data versus "flushed" data.
42
43*/
44
45#[cfg(target_arch = "x86_64")]
46use core::arch::x86_64::_mm_clflush;
47
48#[link(name = "clf")]
49extern "C" {
50    #[allow(dead_code)]
51    fn clf_fallback_clear_cache(begin_ptr: *const u8, end_ptr: *const u8);
52}
53
54///
55/// get the cpu cache line size.
56///
57fn get_cache_line_size() -> usize {
58    use core::sync::atomic::{AtomicUsize, Ordering};
59    static CACHE_LINE_SIZE: AtomicUsize = AtomicUsize::new(0);
60
61    let size = CACHE_LINE_SIZE.load(Ordering::Relaxed);
62    if size != 0 {
63        return size;
64    }
65
66    let detected_size = detect_cache_line_size();
67    CACHE_LINE_SIZE.store(detected_size, Ordering::Relaxed);
68    detected_size
69}
70
71fn detect_cache_line_size() -> usize {
72    #[cfg(target_arch = "x86_64")]
73    {
74        // CPUID leaf 1, EBX bits 15:8 contains the CLFLUSH line size in 8-byte increments
75        #[allow(unused_unsafe)]
76        let cpuid = unsafe { core::arch::x86_64::__cpuid(1) };
77        let size = ((cpuid.ebx >> 8) & 0xff) as usize * 8;
78        if size != 0 {
79            return size;
80        }
81    }
82
83    // AArch64 usually has 64-byte lines.
84    // We avoid using `mrs ctr_el0` as it's restricted on some platforms (like macOS).
85
86    // Default value (most common on modern CPUs)
87    64
88}
89
90///
91/// flush the data cache line, this parameters are pointers.
92///
93/// # Safety
94/// This function is unsafe because it dereferences raw pointers and
95/// performs low-level CPU cache operations.
96pub unsafe fn cache_line_flush_with_ptr(begin_ptr: *const u8, end_ptr: *const u8) {
97    #[cfg(target_arch = "x86_64")]
98    {
99        let size = get_cache_line_size();
100        let mut ptr = begin_ptr;
101        while ptr < end_ptr {
102            _mm_clflush(ptr);
103            ptr = ptr.add(size);
104        }
105    }
106
107    #[cfg(target_arch = "aarch64")]
108    {
109        let size = get_cache_line_size();
110        let mut ptr = begin_ptr as usize;
111        let end = end_ptr as usize;
112        while ptr < end {
113            core::arch::asm!("dc civac, {0}", in(reg) ptr, options(nostack, preserves_flags));
114            ptr += size;
115        }
116        core::arch::asm!("dsb ish", options(nostack, preserves_flags));
117    }
118
119    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
120    {
121        clf_fallback_clear_cache(begin_ptr, end_ptr);
122    }
123}
124
125///
126/// flush the data cache line, this parameter is a slice.
127///
128pub fn cache_line_flush_with_slice<T>(slice: &[T]) {
129    let begin_ptr = slice.as_ptr() as *const u8;
130    let end_ptr = unsafe { begin_ptr.add(core::mem::size_of_val(slice)) };
131    unsafe { cache_line_flush_with_ptr(begin_ptr, end_ptr) };
132}
133
134#[cfg(test)]
135mod tests {
136    #[test]
137    fn it_works_1() {
138        let a = [1, 2, 3, 4, 5, 6, 7, 8, 9];
139        unsafe {
140            super::cache_line_flush_with_ptr(a.as_ptr(), a.as_ptr().add(a.len()));
141        }
142    }
143    #[test]
144    fn it_works_2() {
145        let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
146        super::cache_line_flush_with_slice(&a);
147    }
148    #[test]
149    fn large_slice() {
150        let a = vec![0u8; 1024 * 1024]; // 1MB
151        super::cache_line_flush_with_slice(&a);
152    }
153}