Skip to main content

ax_percpu/
imp.rs

1use core::sync::atomic::{AtomicBool, Ordering};
2
3use ax_percpu_macros::percpu_symbol_vma;
4
5static IS_INIT: AtomicBool = AtomicBool::new(false);
6
7const fn align_up_64(val: usize) -> usize {
8    const SIZE_64BIT: usize = 0x40;
9    (val + SIZE_64BIT - 1) & !(SIZE_64BIT - 1)
10}
11
12#[cfg(not(target_os = "none"))]
13static PERCPU_AREA_BASE: spin::once::Once<usize> = spin::once::Once::new();
14
15extern "C" {
16    fn _percpu_start();
17    fn _percpu_end();
18    // WARNING: `_percpu_load_start`/`_percpu_load_end` (i.e. symbols in the
19    // `.percpu` section) must be used with `percpu_symbol_vma!` macro to get
20    // their VMA addresses. Casting them directly to `usize` may lead to
21    // unexpected results, including:
22    // - Rust assuming they are valid pointers and optimizing code based on that
23    //   assumption (they are non-zero), causing unexpected runtime errors;
24    // - Link-time errors because they are too far away from the program counter
25    //.  (when Rust uses PC-relative addressing).
26    // See https://github.com/arceos-org/percpu/issues/18 for more details.
27    fn _percpu_load_start();
28    fn _percpu_load_end();
29}
30
31/// Returns the number of per-CPU data areas reserved.
32pub fn percpu_area_num() -> usize {
33    (_percpu_end as *const () as usize - _percpu_start as *const () as usize)
34        / align_up_64(percpu_area_size())
35}
36
37/// Returns the per-CPU data area size for one CPU.
38pub fn percpu_area_size() -> usize {
39    percpu_symbol_vma!(_percpu_load_end) - percpu_symbol_vma!(_percpu_load_start)
40}
41
42/// Returns the base address of the per-CPU data area on the given CPU.
43///
44/// if `cpu_id` is 0, it returns the base address of all per-CPU data areas.
45pub fn percpu_area_base(cpu_id: usize) -> usize {
46    cfg_if::cfg_if! {
47        if #[cfg(target_os = "none")] {
48            let base = _percpu_start as *const () as usize;
49        } else {
50            let base = *PERCPU_AREA_BASE.get().unwrap();
51        }
52    }
53    base + cpu_id * align_up_64(percpu_area_size())
54}
55
56/// Initialize all per-CPU data areas.
57///
58/// The number of areas is determined by the following formula:
59///
60/// ```text
61/// (percpu_section_size / align_up(percpu_area_size, 64)
62/// ```
63///
64/// Returns the number of areas initialized. If this function has been called
65/// before, it does nothing and returns 0.
66pub fn init() -> usize {
67    // avoid re-initialization.
68    if IS_INIT
69        .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
70        .is_err()
71    {
72        return 0;
73    }
74
75    #[cfg(not(feature = "non-zero-vma"))]
76    {
77        // `_percpu_load_start as *const () as usize` cannot be used here because
78        // rust will assume a `*const ()` is a valid pointer and will not be 0,
79        // causing unexpected `0 != 0` assertion failure.
80        assert_eq!(
81            percpu_symbol_vma!(_percpu_load_start),
82            0,
83            "The `.percpu` section must be loaded at VMA address 0 when feature \"non-zero-vma\" \
84             is disabled"
85        )
86    }
87
88    #[cfg(target_os = "linux")]
89    {
90        // we not load the ax-percpu section in ELF, allocate them here.
91        let total_size = _percpu_end as *const () as usize - _percpu_start as *const () as usize;
92        let layout = std::alloc::Layout::from_size_align(total_size, 0x1000).unwrap();
93        PERCPU_AREA_BASE.call_once(|| unsafe { std::alloc::alloc(layout) as usize });
94    }
95
96    let base = percpu_area_base(0);
97    let size = percpu_area_size();
98    let num = percpu_area_num();
99    for i in 1..num {
100        let secondary_base = percpu_area_base(i);
101        #[cfg(target_os = "none")]
102        assert!(secondary_base + size <= _percpu_end as *const () as usize);
103        // copy per-cpu data of the primary CPU to other CPUs.
104        unsafe {
105            core::ptr::copy_nonoverlapping(base as *const u8, secondary_base as *mut u8, size);
106        }
107    }
108    num
109}
110
111/// Reads the architecture-specific per-CPU data register.
112///
113/// This register is used to hold the per-CPU data base on each CPU.
114pub fn read_percpu_reg() -> usize {
115    let tp: usize;
116    unsafe {
117        cfg_if::cfg_if! {
118            if #[cfg(target_arch = "x86_64")] {
119                tp = if cfg!(target_os = "linux") {
120                    SELF_PTR.read_current_raw()
121                } else if cfg!(target_os = "none") {
122                    x86::msr::rdmsr(x86::msr::IA32_GS_BASE) as usize
123                } else {
124                    unimplemented!()
125                };
126            } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
127                core::arch::asm!("mv {}, gp", out(reg) tp)
128            } else if #[cfg(all(target_arch = "aarch64", not(feature = "arm-el2")))] {
129                core::arch::asm!("mrs {}, TPIDR_EL1", out(reg) tp)
130            } else if #[cfg(all(target_arch = "aarch64", feature = "arm-el2"))] {
131                core::arch::asm!("mrs {}, TPIDR_EL2", out(reg) tp)
132            } else if #[cfg(target_arch = "loongarch64")] {
133                // Register Convention
134                // https://docs.kernel.org/arch/loongarch/introduction.html#gprs
135                core::arch::asm!("move {}, $r21", out(reg) tp)
136            } else if #[cfg(target_arch = "arm")] {
137                core::arch::asm!("mrc p15, 0, {}, c13, c0, 3", out(reg) tp)
138            }
139        }
140    }
141    cfg_if::cfg_if! {
142        if #[cfg(feature = "non-zero-vma")] {
143            tp + percpu_symbol_vma!(_percpu_load_start)
144        } else {
145            tp
146        }
147    }
148}
149
150/// Writes the architecture-specific per-CPU data register.
151///
152/// This register is used to hold the per-CPU data base on each CPU.
153///
154/// # Safety
155///
156/// This function is unsafe because it writes the low-level register directly.
157pub unsafe fn write_percpu_reg(tp: usize) {
158    cfg_if::cfg_if! {
159        if #[cfg(feature = "non-zero-vma")] {
160            let tp = tp - percpu_symbol_vma!(_percpu_load_start);
161        }
162    };
163
164    unsafe {
165        cfg_if::cfg_if! {
166            if #[cfg(target_arch = "x86_64")] {
167                if cfg!(target_os = "linux") {
168                    const ARCH_SET_GS: u32 = 0x1001;
169                    const SYS_ARCH_PRCTL: u32 = 158;
170                    core::arch::asm!(
171                        "syscall",
172                        in("eax") SYS_ARCH_PRCTL,
173                        in("edi") ARCH_SET_GS,
174                        in("rsi") tp,
175                    );
176                } else if cfg!(target_os = "none") {
177                    x86::msr::wrmsr(x86::msr::IA32_GS_BASE, tp as u64);
178                } else {
179                    unimplemented!()
180                }
181                SELF_PTR.write_current_raw(tp);
182            } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] {
183                core::arch::asm!("mv gp, {}", in(reg) tp)
184            } else if #[cfg(all(target_arch = "aarch64", not(feature = "arm-el2")))] {
185                core::arch::asm!("msr TPIDR_EL1, {}", in(reg) tp)
186            } else if #[cfg(all(target_arch = "aarch64", feature = "arm-el2"))] {
187                core::arch::asm!("msr TPIDR_EL2, {}", in(reg) tp)
188            } else if #[cfg(target_arch = "loongarch64")] {
189                core::arch::asm!("move $r21, {}", in(reg) tp)
190            } else if #[cfg(target_arch = "arm")] {
191                core::arch::asm!("mcr p15, 0, {}, c13, c0, 3", in(reg) tp)
192            }
193        }
194    }
195}
196
197/// Initializes the per-CPU data register.
198///
199/// It is equivalent to `write_percpu_reg(percpu_area_base(cpu_id))`, which set
200/// the architecture-specific per-CPU data register to the base address of the
201/// corresponding per-CPU data area.
202///
203/// `cpu_id` indicates which per-CPU data area to use.
204pub fn init_percpu_reg(cpu_id: usize) {
205    let tp = percpu_area_base(cpu_id);
206    unsafe { write_percpu_reg(tp) }
207}
208
209/// To use `ax_percpu::__priv::NoPreemptGuard::new()` and `ax_percpu::percpu_area_base()` in macro expansion.
210#[allow(unused_imports)]
211use crate as ax_percpu;
212
213/// On x86, we use `gs:SELF_PTR` to store the address of the per-CPU data area base.
214#[cfg(target_arch = "x86_64")]
215#[no_mangle]
216#[ax_percpu_macros::def_percpu]
217static SELF_PTR: usize = 0;