ax-cpu 0.7.0

Privileged instruction and structure abstractions for various CPU architectures
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
use core::{arch::naked_asm, fmt};

use ax_memory_addr::VirtAddr;

/// Saved registers when a trap (interrupt or exception) occurs.
#[allow(missing_docs)]
#[repr(C)]
#[derive(Debug, Default, Clone, Copy)]
pub struct TrapFrame {
    pub rax: u64,
    pub rcx: u64,
    pub rdx: u64,
    pub rbx: u64,
    pub rbp: u64,
    pub rsi: u64,
    pub rdi: u64,
    pub r8: u64,
    pub r9: u64,
    pub r10: u64,
    pub r11: u64,
    pub r12: u64,
    pub r13: u64,
    pub r14: u64,
    pub r15: u64,

    // Pushed by `trap.S`
    pub vector: u64,
    pub error_code: u64,

    // Pushed by CPU
    pub rip: u64,
    pub cs: u64,
    pub rflags: u64,
    pub rsp: u64,
    pub ss: u64,
}

impl TrapFrame {
    /// Gets the 0th syscall argument.
    pub const fn arg0(&self) -> usize {
        self.rdi as _
    }

    /// Sets the 0th syscall argument.
    pub const fn set_arg0(&mut self, rdi: usize) {
        self.rdi = rdi as _;
    }

    /// Gets the 1st syscall argument.
    pub const fn arg1(&self) -> usize {
        self.rsi as _
    }

    /// Sets the 1st syscall argument.
    pub const fn set_arg1(&mut self, rsi: usize) {
        self.rsi = rsi as _;
    }

    /// Gets the 2nd syscall argument.
    pub const fn arg2(&self) -> usize {
        self.rdx as _
    }

    /// Sets the 2nd syscall argument.
    pub const fn set_arg2(&mut self, rdx: usize) {
        self.rdx = rdx as _;
    }

    /// Gets the 3rd syscall argument.
    pub const fn arg3(&self) -> usize {
        self.r10 as _
    }

    /// Sets the 3rd syscall argument.
    pub const fn set_arg3(&mut self, r10: usize) {
        self.r10 = r10 as _;
    }

    /// Gets the 4th syscall argument.
    pub const fn arg4(&self) -> usize {
        self.r8 as _
    }

    /// Sets the 4th syscall argument.
    pub const fn set_arg4(&mut self, r8: usize) {
        self.r8 = r8 as _;
    }

    /// Gets the 5th syscall argument.
    pub const fn arg5(&self) -> usize {
        self.r9 as _
    }

    /// Sets the 5th syscall argument.
    pub const fn set_arg5(&mut self, r9: usize) {
        self.r9 = r9 as _;
    }

    /// Gets the instruction pointer.
    pub const fn ip(&self) -> usize {
        self.rip as _
    }

    /// Sets the instruction pointer.
    pub const fn set_ip(&mut self, rip: usize) {
        self.rip = rip as _;
    }

    /// Gets the stack pointer.
    pub const fn sp(&self) -> usize {
        self.rsp as _
    }

    /// Sets the stack pointer.
    pub const fn set_sp(&mut self, rsp: usize) {
        self.rsp = rsp as _;
    }

    /// Gets the syscall number.
    pub const fn sysno(&self) -> usize {
        self.rax as usize
    }

    /// Sets the syscall number.
    pub const fn set_sysno(&mut self, rax: usize) {
        self.rax = rax as _;
    }

    /// Gets the return value register.
    pub const fn retval(&self) -> usize {
        self.rax as _
    }

    /// Sets the return value register.
    pub const fn set_retval(&mut self, rax: usize) {
        self.rax = rax as _;
    }

    /// Unwind the stack and get the backtrace.
    pub fn backtrace(&self) -> axbacktrace::Backtrace {
        axbacktrace::Backtrace::capture_trap(self.rbp as _, self.rip as _, 0)
    }
}

#[repr(C)]
#[derive(Debug, Default)]
struct ContextSwitchFrame {
    r15: u64,
    r14: u64,
    r13: u64,
    r12: u64,
    rbx: u64,
    rbp: u64,
    rip: u64,
}

/// A 512-byte memory region for the FXSAVE/FXRSTOR instruction to save and
/// restore the x87 FPU, MMX, XMM, and MXCSR registers.
///
/// This is also the legacy region (offset 0..512) at the head of the
/// XSAVE/XRSTOR area, so it doubles as the start of [`XsaveArea`].
///
/// See <https://www.felixcloutier.com/x86/fxsave> for more details.
#[allow(missing_docs)]
#[repr(C, align(16))]
#[derive(Clone, Copy, Debug)]
pub struct FxsaveArea {
    pub fcw: u16,
    pub fsw: u16,
    pub ftw: u16,
    pub fop: u16,
    pub fip: u64,
    pub fdp: u64,
    pub mxcsr: u32,
    pub mxcsr_mask: u32,
    pub st: [u64; 16],
    pub xmm: [u64; 32],
    _padding: [u64; 12],
}

const _: () = assert!(core::mem::size_of::<FxsaveArea>() == 512);

/// Size of the per-task XSAVE/XRSTOR area, in bytes.
///
/// The boot path ([`enable_xsave_features`]) only ever enables the x87, SSE,
/// and AVX components in `XCR0` (it never enables AVX-512, MPX, or PKRU), so the
/// largest XSAVE layout we must hold is the 512-byte legacy region, the 64-byte
/// XSAVE header, and the 256-byte AVX (`YMM_Hi128`) component. 1024 bytes covers
/// that with headroom and is a multiple of the required 64-byte alignment.
///
/// [`enable_xsave_features`]: ../../../someboot/src/arch/x86_64/trap.rs
const XSAVE_AREA_SIZE: usize = 1024;

/// A 64-byte-aligned memory region for the XSAVE/XRSTOR instructions, which save
/// and restore the full `XCR0`-enabled extended state (x87, SSE/XMM, and the
/// upper 128 bits of the AVX `YMM` registers that FXSAVE/FXRSTOR drop).
///
/// The first 512 bytes share the legacy [`FxsaveArea`] layout, so the FXSAVE
/// fallback path (CPUs/VMs without XSAVE, e.g. the default `qemu64` model) reads
/// and writes the same region.
///
/// See <https://www.felixcloutier.com/x86/xsave> for more details.
#[repr(C, align(64))]
struct XsaveArea {
    /// Legacy region, identical in layout to the FXSAVE/FXRSTOR area.
    legacy: FxsaveArea,
    /// XSAVE header (`XSTATE_BV`, `XCOMP_BV`, reserved) plus the extended
    /// component area. A zeroed header marks every component as being in its
    /// initial state, which is the correct starting point for a fresh task.
    rest: [u8; XSAVE_AREA_SIZE - 512],
}

const _: () = assert!(core::mem::size_of::<XsaveArea>() == XSAVE_AREA_SIZE);

/// Extended state of a task, such as FP/SIMD states.
///
/// On context switch the state is saved/restored with XSAVE/XRSTOR when the boot
/// path enabled `CR4.OSXSAVE` (so that the AVX `YMM` upper halves are preserved),
/// and falls back to FXSAVE/FXRSTOR otherwise.
pub struct ExtendedState {
    area: XsaveArea,
}

#[cfg(feature = "fp-simd")]
impl ExtendedState {
    /// Provides access to the legacy FXSAVE region for compatibility with code
    /// that inspects the x87/SSE state directly.
    #[inline]
    pub fn fxsave_area(&self) -> &FxsaveArea {
        &self.area.legacy
    }

    /// Returns `true` when the boot path enabled XSAVE state management
    /// (`CR4.OSXSAVE`), which is the single source of truth for whether
    /// XSAVE/XRSTOR (and reading `XCR0` via `XGETBV`) are safe to use.
    #[inline]
    fn xsave_enabled() -> bool {
        // SAFETY: reading CR4 from ring 0 is always well-defined.
        let cr4 = unsafe { x86::controlregs::cr4() };
        cr4.contains(x86::controlregs::Cr4::CR4_ENABLE_OS_XSAVE)
    }

    /// The set of state components to save/restore, i.e. the `XCR0` mask the
    /// boot path programmed. Only valid to call when [`Self::xsave_enabled`].
    #[inline]
    fn xsave_mask() -> u64 {
        // SAFETY: `CR4.OSXSAVE` is set (checked by the caller), so XGETBV is
        // well-defined and will not #UD.
        unsafe { x86::controlregs::xcr0().bits() }
    }

    /// Saves the current extended states from CPU to this structure.
    #[inline]
    pub fn save(&mut self) {
        let ptr = &mut self.area as *mut _ as *mut u8;
        if Self::xsave_enabled() {
            // SAFETY: `area` is 64-byte aligned and large enough for the
            // XCR0-enabled state (x87/SSE/AVX); the mask matches XCR0.
            unsafe { core::arch::x86_64::_xsave64(ptr, Self::xsave_mask()) }
        } else {
            // SAFETY: `area` starts with the 16-byte-aligned legacy FXSAVE region.
            unsafe { core::arch::x86_64::_fxsave64(ptr) }
        }
    }

    /// Restores the extended states from this structure to CPU.
    #[inline]
    pub fn restore(&self) {
        let ptr = &self.area as *const _ as *const u8;
        if Self::xsave_enabled() {
            // SAFETY: `area` was populated by `_xsave64` (or zero-initialized,
            // which XRSTOR reads as the components' initial state) with a header
            // consistent with the XCR0 mask used here.
            unsafe { core::arch::x86_64::_xrstor64(ptr, Self::xsave_mask()) }
        } else {
            // SAFETY: `area` starts with the 16-byte-aligned legacy FXSAVE region.
            unsafe { core::arch::x86_64::_fxrstor64(ptr) }
        }
    }

    /// Returns the extended state with initialized values.
    pub const fn default() -> Self {
        // Zeroing the whole area gives XRSTOR an all-initial XSAVE header
        // (XSTATE_BV = 0) so the first restore loads each component's default
        // state; the legacy fields below seed the FXSAVE fallback path too.
        let mut area: XsaveArea = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
        area.legacy.fcw = 0x37f;
        area.legacy.ftw = 0xffff;
        area.legacy.mxcsr = 0x1f80;
        Self { area }
    }
}

impl fmt::Debug for ExtendedState {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("ExtendedState")
            .field("fxsave_area", &self.area.legacy)
            .finish()
    }
}

/// Saved hardware states of a task.
///
/// The context usually includes:
///
/// - Callee-saved registers
/// - Stack pointer register
/// - Thread pointer register (for kernel-space thread-local storage)
/// - FP/SIMD registers
///
/// On context switch, current task saves its context from CPU to memory,
/// and the next task restores its context from memory to CPU.
///
/// On x86_64, callee-saved registers are saved to the kernel stack by the
/// `PUSH` instruction. So that [`rsp`] is the `RSP` after callee-saved
/// registers are pushed, and [`kstack_top`] is the top of the kernel stack
/// (`RSP` before any push).
///
/// [`rsp`]: TaskContext::rsp
/// [`kstack_top`]: TaskContext::kstack_top
#[derive(Debug)]
pub struct TaskContext {
    /// The kernel stack top of the task.
    pub kstack_top: VirtAddr,
    /// `RSP` after all callee-saved registers are pushed.
    pub rsp: u64,
    /// Thread pointer (FS segment base address)
    pub fs_base: usize,
    /// Extended states, i.e., FP/SIMD states.
    #[cfg(feature = "fp-simd")]
    pub ext_state: ExtendedState,
    /// The `CR3` register value, i.e., the page table root.
    #[cfg(feature = "uspace")]
    pub cr3: ax_memory_addr::PhysAddr,
}

impl TaskContext {
    /// Creates a dummy context for a new task.
    ///
    /// Note the context is not initialized, it will be filled by [`switch_to`]
    /// (for initial tasks) and [`init`] (for regular tasks) methods.
    ///
    /// [`init`]: TaskContext::init
    /// [`switch_to`]: TaskContext::switch_to
    pub fn new() -> Self {
        Self {
            kstack_top: va!(0),
            rsp: 0,
            fs_base: 0,
            #[cfg(feature = "uspace")]
            cr3: crate::asm::read_kernel_page_table(),
            #[cfg(feature = "fp-simd")]
            ext_state: ExtendedState::default(),
        }
    }

    /// Initializes the context for a new task, with the given entry point and
    /// kernel stack.
    pub fn init(&mut self, entry: usize, kstack_top: VirtAddr, tls_area: VirtAddr) {
        unsafe {
            // x86_64 calling convention: the stack must be 16-byte aligned before
            // calling a function. That means when entering a new task (`ret` in `context_switch`
            // is executed), (stack pointer + 8) should be 16-byte aligned.
            let frame_ptr = (kstack_top.as_mut_ptr() as *mut u64).sub(1);
            let frame_ptr = (frame_ptr as *mut ContextSwitchFrame).sub(1);
            core::ptr::write(
                frame_ptr,
                ContextSwitchFrame {
                    rip: entry as _,
                    ..Default::default()
                },
            );
            self.rsp = frame_ptr as u64;
        }
        self.kstack_top = kstack_top;
        self.fs_base = tls_area.as_usize();
    }

    /// Changes the page table root in this context.
    ///
    /// The hardware register for page table root (`CR3` for x86) will be
    /// updated to the next task's after [`Self::switch_to`].
    #[cfg(feature = "uspace")]
    pub fn set_page_table_root(&mut self, cr3: ax_memory_addr::PhysAddr) {
        self.cr3 = cr3;
    }

    /// Switches to another task.
    ///
    /// It first saves the current task's context from CPU to this place, and then
    /// restores the next task's context from `next_ctx` to CPU.
    pub fn switch_to(&mut self, next_ctx: &Self) {
        #[cfg(feature = "fp-simd")]
        {
            self.ext_state.save();
            next_ctx.ext_state.restore();
        }
        #[cfg(feature = "tls")]
        unsafe {
            self.fs_base = crate::asm::read_thread_pointer();
            crate::asm::write_thread_pointer(next_ctx.fs_base);
        }
        #[cfg(feature = "uspace")]
        unsafe {
            if next_ctx.cr3 != self.cr3 {
                crate::asm::write_user_page_table(next_ctx.cr3);
                // writing to CR3 has flushed the TLB
            }
        }
        unsafe { context_switch(&mut self.rsp, &next_ctx.rsp) }
    }
}

#[unsafe(naked)]
unsafe extern "C" fn context_switch(_current_stack: &mut u64, _next_stack: &u64) {
    naked_asm!(
        "
        .code64
        push    rbp
        push    rbx
        push    r12
        push    r13
        push    r14
        push    r15
        mov     [rdi], rsp

        mov     rsp, [rsi]
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        pop     rbx
        pop     rbp
        ret",
    )
}