Skip to main content

percpu/
imp.rs

1use core::ptr::addr_of;
2use core::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
3
4use crate::InitError;
5use percpu_macros::percpu_symbol_vma;
6
7/// This atomic tracks whether the per-CPU data areas is being initialized.
8/// It is cleared after initialization to enable re-initialization.
9static IS_INIT: AtomicBool = AtomicBool::new(false);
10
11const SIZE_64BIT: usize = 64;
12const PERCPU_AREA_ALIGN: usize = SIZE_64BIT;
13
14const fn align_up_64(val: usize) -> usize {
15    (val + SIZE_64BIT - 1) & !(SIZE_64BIT - 1)
16}
17
18/// The per-CPU data area base address.
19/// Set by `init()` or `init_in_place()` during initialization.
20static PERCPU_AREA_BASE: AtomicPtr<()> = AtomicPtr::new(core::ptr::null_mut());
21
22extern "C" {
23    static _percpu_start: u8;
24    static _percpu_end: u8;
25    // WARNING: `_percpu_load_start`/`_percpu_load_end` (i.e. symbols in the
26    // `.percpu` section) must be used with `percpu_symbol_vma!` macro to get
27    // their VMA addresses. Casting them directly to `usize` may lead to
28    // unexpected results, including:
29    // - Rust assuming they are valid pointers and optimizing code based on that
30    //   assumption (they are non-zero), causing unexpected runtime errors;
31    // - Link-time errors because they are too far away from the program counter
32    //.  (when Rust uses PC-relative addressing).
33    //
34    // See https://github.com/arceos-org/percpu/issues/18 for more details.
35    static _percpu_load_start: u8;
36    static _percpu_load_end: u8;
37}
38
39/// Returns the number of per-CPU data areas reserved in the `.percpu` section.
40///
41/// This is calculated based on the size of the `.percpu` section and the size
42/// of one per-CPU data area. The section size should be reserved in the linker
43/// script with enough space for all CPUs.
44pub fn percpu_area_num() -> usize {
45    (addr_of!(_percpu_end) as usize - addr_of!(_percpu_start) as usize)
46        / align_up_64(percpu_area_size())
47}
48
49/// Returns the per-CPU data area size for one CPU.
50///
51/// This is the size of the `.percpu` section content (all per-CPU static variables),
52/// rounded up to 64-byte alignment.
53pub fn percpu_area_size() -> usize {
54    percpu_symbol_vma!(_percpu_load_end) - percpu_symbol_vma!(_percpu_load_start)
55}
56
57/// Returns the expected layout for the per-CPU data area for the given number
58/// of CPUs.
59///
60/// # Arguments
61///
62/// - `cpu_count`: Number of CPUs.
63///
64/// # Returns
65///
66/// The expected layout for the per-CPU data area.
67pub fn percpu_area_layout_expected(cpu_count: usize) -> core::alloc::Layout {
68    let size = cpu_count * align_up_64(percpu_area_size());
69    core::alloc::Layout::from_size_align(size, PERCPU_AREA_ALIGN).unwrap()
70}
71
72fn percpu_area_base_nolock(cpu_id: usize) -> usize {
73    let base = PERCPU_AREA_BASE.load(Ordering::Relaxed);
74
75    if base.is_null() {
76        panic!("PerCPU area base address not set");
77    }
78
79    base as usize + cpu_id * align_up_64(percpu_area_size())
80}
81
82/// Returns the base address of the per-CPU data area for the given CPU.
83///
84/// # Panics
85///
86/// Panics if the per-CPU area base address has not been set (i.e., `init()`
87/// or `init_in_place()` has not been called).
88///
89/// # Concurrency
90///
91/// This function spins until initialization is complete if called during
92/// the initialization process.
93pub fn percpu_area_base(cpu_id: usize) -> usize {
94    while IS_INIT.load(Ordering::Acquire) {
95        core::hint::spin_loop();
96    }
97
98    percpu_area_base_nolock(cpu_id)
99}
100
101/// Check if the `.percpu` section is loaded at VMA address 0 when feature "non-zero-vma" is disabled.
102fn validate_percpu_vma() {
103    // `_percpu_load_start as *mut u8 as usize` cannot be used here because
104    // rust will assume a `*mut u8` is a valid pointer and will not be 0,
105    // causing unexpected `0 != 0` assertion failure.
106    #[cfg(not(feature = "non-zero-vma"))]
107    {
108        assert_eq!(
109            percpu_symbol_vma!(_percpu_load_start), 0,
110            "The `.percpu` section must be loaded at VMA address 0 when feature \"non-zero-vma\" is disabled"
111        )
112    }
113}
114
115/// Copies the per-CPU data from the source to the per-CPU data areas of the
116/// given CPUs.
117fn copy_percpu_region<T: Iterator<Item = usize>>(source: *const u8, dest_ids: T) {
118    let size = percpu_area_size();
119
120    for dest_id in dest_ids {
121        let dest_base = percpu_area_base_nolock(dest_id);
122        unsafe {
123            core::ptr::copy_nonoverlapping(source, dest_base as *mut u8, size);
124        }
125    }
126}
127
128fn validate_percpu_area_base(base: *mut u8) -> Result<(), InitError> {
129    if base.is_null() {
130        return Err(InitError::InvalidBase);
131    }
132    if (base as usize) % PERCPU_AREA_ALIGN != 0 {
133        return Err(InitError::UnalignedBase);
134    }
135    Ok(())
136}
137
138fn init_inner(
139    base: *mut u8,
140    cpu_count: usize,
141    do_not_copy_to_primary: bool,
142) -> Result<usize, InitError> {
143    validate_percpu_area_base(base)?;
144
145    // Avoid re-initialization.
146    if IS_INIT
147        .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
148        .is_err()
149    {
150        return Ok(0);
151    }
152
153    // Validate the VMA of the `.percpu` section.
154    validate_percpu_vma();
155
156    // Set the base address of the per-CPU data areas.
157    PERCPU_AREA_BASE.store(base as _, Ordering::Relaxed);
158
159    // Copy the per-CPU data from the `.percpu` section to the per-CPU areas of
160    // all CPUs.
161    copy_percpu_region(
162        addr_of!(_percpu_start),
163        if do_not_copy_to_primary {
164            1..cpu_count
165        } else {
166            0..cpu_count
167        },
168    );
169
170    // Enable re-initialization.
171    IS_INIT.store(false, Ordering::Release);
172
173    Ok(cpu_count)
174}
175
176/// Initialize per-CPU data areas using the `.percpu` section.
177///
178/// This function uses `_percpu_start` as the base address. The per-CPU data
179/// areas are statically allocated in the `.percpu` section by the linker script.
180/// The primary CPU's data is already in place, so only data for secondary CPUs
181/// (1 to cpu_count-1) is copied.
182///
183/// This function can be called repeatedly for re-initialization. However,
184/// re-initialization will overwrite existing per-CPU data, so per-CPU variables
185/// should be reset manually after re-initialization.
186///
187/// # Returns
188///
189/// The number of per-CPU areas initialized (i.e., `percpu_area_num()`) on
190/// success.
191///
192/// Returns [`InitError::InvalidBase`] if `_percpu_start` is null, or
193/// [`InitError::UnalignedBase`] if `_percpu_start` is not 64-byte aligned.
194pub fn init_in_place() -> Result<usize, InitError> {
195    let base = addr_of!(_percpu_start) as *mut u8;
196    init_inner(base, percpu_area_num(), true)
197}
198
199/// Initialize per-CPU data areas with user-provided memory.
200///
201/// The caller is responsible for allocating memory for the per-CPU data areas.
202/// Use [`percpu_area_layout_expected()`] to calculate the required memory size.
203/// The allocated memory should be aligned to at least 64 bytes (cache line size),
204/// and preferably to 4KiB (page size) for better performance.
205///
206/// This function copies the `.percpu` section content to the user-provided memory
207/// for all CPUs (0 to cpu_count-1).
208///
209/// This function can be called repeatedly for re-initialization. However,
210/// re-initialization will overwrite existing per-CPU data, so per-CPU variables
211/// should be reset manually after re-initialization.
212///
213/// # Arguments
214///
215/// - `base`: Base address of the user-allocated memory.
216/// - `cpu_count`: Number of CPUs.
217///
218/// # Returns
219///
220/// The number of per-CPU areas initialized (i.e., `cpu_count`) on success.
221///
222/// Returns [`InitError::InvalidBase`] if `base` is null, or
223/// [`InitError::UnalignedBase`] if `base` is not 64-byte aligned.
224///
225/// # Example
226///
227/// ```rust,no_run
228/// let cpu_count = 4;
229/// let layout = percpu::percpu_area_layout_expected(cpu_count);
230/// let base = unsafe { std::alloc::alloc(layout) as usize };
231/// percpu::init(base as *mut u8, cpu_count).unwrap();
232/// ```
233pub fn init(base: *mut u8, cpu_count: usize) -> Result<usize, InitError> {
234    init_inner(base, cpu_count, false)
235}
236
237/// Reads the architecture-specific per-CPU data register.
238///
239/// Returns the value stored in the per-CPU register, which is the base address
240/// of the current CPU's per-CPU data area.
241///
242/// # Architecture-specific registers
243///
244/// | Architecture | Register |
245/// |--------------|----------|
246/// | x86_64 | `GS_BASE` MSR |
247/// | RISC-V | `gp` |
248/// | AArch64 | `TPIDR_EL1` or `TPIDR_EL2` (with `arm-el2` feature) |
249/// | LoongArch | `$r21` |
250/// | ARM (32-bit) | `TPIDRPRW` (CP15 c13) |
251pub fn read_percpu_reg() -> usize {
252    let tp: usize;
253    unsafe {
254        cfg_if::cfg_if! {
255            if #[cfg(target_arch = "x86_64")] {
256                tp = if cfg!(target_os = "linux") {
257                    SELF_PTR.read_current_raw()
258                } else if cfg!(target_os = "none") {
259                    x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize
260                } else {
261                    unimplemented!()
262                };
263            } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
264                core::arch::asm!("mv {}, gp", out(reg) tp)
265            } else if #[cfg(all(target_arch = "aarch64", not(feature = "arm-el2")))] {
266                core::arch::asm!("mrs {}, TPIDR_EL1", out(reg) tp)
267            } else if #[cfg(all(target_arch = "aarch64", feature = "arm-el2"))] {
268                core::arch::asm!("mrs {}, TPIDR_EL2", out(reg) tp)
269            } else if #[cfg(target_arch = "loongarch64")] {
270                // Register Convention
271                // https://docs.kernel.org/arch/loongarch/introduction.html#gprs
272                core::arch::asm!("move {}, $r21", out(reg) tp)
273            } else if #[cfg(target_arch = "arm")] {
274                core::arch::asm!("mrc p15, 0, {}, c13, c0, 4", out(reg) tp)
275            }
276        }
277    }
278    cfg_if::cfg_if! {
279        if #[cfg(feature = "non-zero-vma")] {
280            tp + percpu_symbol_vma!(_percpu_load_start)
281        } else {
282            tp
283        }
284    }
285}
286
287/// Writes the architecture-specific per-CPU data register.
288///
289/// Sets the per-CPU register to the given value, which should be the base address
290/// of the current CPU's per-CPU data area.
291///
292/// # Safety
293///
294/// This function is unsafe because it directly writes to a low-level register.
295/// Setting an invalid address may cause undefined behavior.
296///
297/// # Architecture-specific registers
298///
299/// See [`read_percpu_reg()`] for the list of registers used per architecture.
300pub unsafe fn write_percpu_reg(tp: usize) {
301    cfg_if::cfg_if! {
302        if #[cfg(feature = "non-zero-vma")] {
303            let tp = tp - percpu_symbol_vma!(_percpu_load_start);
304        }
305    };
306
307    unsafe {
308        cfg_if::cfg_if! {
309            if #[cfg(target_arch = "x86_64")] {
310                if cfg!(target_os = "linux") {
311                    const ARCH_SET_GS: u32 = 0x1001;
312                    const SYS_ARCH_PRCTL: u32 = 158;
313                    core::arch::asm!(
314                        "syscall",
315                        in("eax") SYS_ARCH_PRCTL,
316                        in("edi") ARCH_SET_GS,
317                        in("rsi") tp,
318                    );
319                } else if cfg!(target_os = "none") {
320                    x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tp as u64);
321                } else {
322                    unimplemented!()
323                }
324                SELF_PTR.write_current_raw(tp);
325            } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
326                core::arch::asm!("mv gp, {}", in(reg) tp)
327            } else if #[cfg(all(target_arch = "aarch64", not(feature = "arm-el2")))] {
328                core::arch::asm!("msr TPIDR_EL1, {}", in(reg) tp)
329            } else if #[cfg(all(target_arch = "aarch64", feature = "arm-el2"))] {
330                core::arch::asm!("msr TPIDR_EL2, {}", in(reg) tp)
331            } else if #[cfg(target_arch = "loongarch64")] {
332                core::arch::asm!("move $r21, {}", in(reg) tp)
333            } else if #[cfg(target_arch = "arm")] {
334                core::arch::asm!("mcr p15, 0, {}, c13, c0, 4", in(reg) tp)
335            }
336        }
337    }
338}
339
340/// Initializes the per-CPU data register for the current CPU.
341///
342/// This function sets the architecture-specific per-CPU register to point to
343/// the base address of the per-CPU data area for the given CPU ID.
344///
345/// This should be called on each CPU during boot, after `init()` or `init_in_place()`
346/// has been called.
347///
348/// # Arguments
349///
350/// - `cpu_id`: The CPU ID to use (0-based index).
351///
352/// # Panics
353///
354/// Panics if the per-CPU area base address has not been set.
355pub fn init_percpu_reg(cpu_id: usize) {
356    let tp = percpu_area_base(cpu_id);
357    unsafe { write_percpu_reg(tp) }
358}
359
360/// To use `percpu::__priv::NoPreemptGuard::new()` and `percpu::percpu_area_base()` in macro expansion.
361#[allow(unused_imports)]
362use crate as percpu;
363
364/// On x86, we use `gs:SELF_PTR` to store the address of the per-CPU data area base.
365#[cfg(target_arch = "x86_64")]
366#[no_mangle]
367#[percpu_macros::def_percpu]
368static SELF_PTR: usize = 0;