Skip to main content

tdx_guest/
lib.rs

1// SPDX-License-Identifier: BSD-3-Clause
2// Copyright(c) 2023-2026 Intel Corporation.
3
4#![cfg_attr(not(test), no_std)]
5#![allow(dead_code)]
6
7extern crate alloc;
8
9mod asm;
10pub mod tdcall;
11pub mod tdvmcall;
12pub mod unaccepted_memory;
13mod ve;
14
15use core::sync::atomic::{
16    AtomicBool, AtomicU64, AtomicU8,
17    Ordering::{Acquire, Relaxed, Release},
18};
19
20use bitflags::bitflags;
21use raw_cpuid::{native_cpuid::cpuid_count, CpuIdResult};
22use tdcall::{InitError, TdCallError, TdgVpInfo};
23use ve::{handle_io, handle_mmio};
24
25pub use self::{
26    tdcall::{accept_page, get_veinfo, TdgVeInfo, TdxVirtualExceptionType},
27    tdvmcall::{cpuid, hlt, print, rdmsr, wrmsr, TDX_LOGGER},
28};
29
30#[derive(Debug)]
31pub enum TopologyError {
32    TdCall(TdCallError),
33    NotConfigured,
34}
35
36#[derive(Debug)]
37pub enum SeptVeError {
38    TdCall(TdCallError),
39    Misconfiguration,
40}
41
42#[derive(Debug)]
43pub enum AcceptError {
44    TdCall(TdCallError),
45    InvalidAlignment,
46    OutOfBounds,
47    Overlap,
48    ArithmeticOverflow,
49}
50
51pub type TdxGpa = usize;
52
53pub static SHARED_MASK: AtomicU64 = AtomicU64::new(0);
54
55pub trait TdxTrapFrame {
56    fn rax(&self) -> usize;
57    fn set_rax(&mut self, rax: usize);
58    fn rbx(&self) -> usize;
59    fn set_rbx(&mut self, rbx: usize);
60    fn rcx(&self) -> usize;
61    fn set_rcx(&mut self, rcx: usize);
62    fn rdx(&self) -> usize;
63    fn set_rdx(&mut self, rdx: usize);
64    fn rsi(&self) -> usize;
65    fn set_rsi(&mut self, rsi: usize);
66    fn rdi(&self) -> usize;
67    fn set_rdi(&mut self, rdi: usize);
68    fn rip(&self) -> usize;
69    fn set_rip(&mut self, rip: usize);
70    fn r8(&self) -> usize;
71    fn set_r8(&mut self, r8: usize);
72    fn r9(&self) -> usize;
73    fn set_r9(&mut self, r9: usize);
74    fn r10(&self) -> usize;
75    fn set_r10(&mut self, r10: usize);
76    fn r11(&self) -> usize;
77    fn set_r11(&mut self, r11: usize);
78    fn r12(&self) -> usize;
79    fn set_r12(&mut self, r12: usize);
80    fn r13(&self) -> usize;
81    fn set_r13(&mut self, r13: usize);
82    fn r14(&self) -> usize;
83    fn set_r14(&mut self, r14: usize);
84    fn r15(&self) -> usize;
85    fn set_r15(&mut self, r15: usize);
86    fn rbp(&self) -> usize;
87    fn set_rbp(&mut self, rbp: usize);
88}
89
90#[inline(always)]
91pub fn tdx_is_enabled() -> bool {
92    TDX_ENABLED.load(Relaxed)
93}
94
95/// Returns true if the system is identified as an Intel TDX guest during early boot.
96///
97/// This function is designed for use in environments like the EFI stub where
98/// complex initialization is not yet possible. It uses an internal atomic cache
99/// to ensure that the hardware CPUID check is performed only once.
100pub fn is_tdx_guest_early() -> bool {
101    match TdxEarlyState::from(TDX_EARLY_STATE.load(Acquire)) {
102        TdxEarlyState::Enabled => true,
103        TdxEarlyState::Disabled => false,
104        _ => {
105            let is_tdx = is_tdx_hardware_present();
106            let new_state = if is_tdx {
107                TdxEarlyState::Enabled
108            } else {
109                TdxEarlyState::Disabled
110            };
111
112            TDX_EARLY_STATE.store(new_state as u8, Release);
113            is_tdx
114        }
115    }
116}
117
118/// Performs full initialization of the Intel TDX guest environment.
119///
120/// This function validates the TDX hardware signature, invokes the `TDG.VP.INFO`
121/// TDCALL to retrieve Trust Domain environment information, and configures global
122/// state such as the shared memory mask.
123pub fn init_tdx() -> Result<TdgVpInfo, InitError> {
124    if tdx_is_enabled() {
125        return tdcall::get_tdinfo().map_err(InitError::TdxGetVpInfoError);
126    }
127
128    check_tdx_guest()?;
129
130    let info = tdcall::get_tdinfo().map_err(InitError::TdxGetVpInfoError)?;
131
132    let gpaw: u64 = info.gpaw.into();
133    let mask = 1u64 << (gpaw - 1);
134    SHARED_MASK.store(mask, Relaxed);
135
136    TDX_ENABLED.store(true, Relaxed);
137
138    Ok(info)
139}
140
141pub fn handle_virtual_exception(trapframe: &mut dyn TdxTrapFrame, ve_info: &TdgVeInfo) {
142    let mut instr_len = ve_info.exit_instruction_length;
143    match ve_info.exit_reason.into() {
144        TdxVirtualExceptionType::Hlt => {
145            hlt();
146        }
147        TdxVirtualExceptionType::Io => {
148            if !handle_io(trapframe, ve_info) {
149                serial_println!("Handle tdx ioexit errors, ready to halt");
150                hlt();
151            }
152        }
153        TdxVirtualExceptionType::MsrRead => {
154            let msr = unsafe { rdmsr(trapframe.rcx() as u32).unwrap() };
155            trapframe.set_rax((msr as u32) as usize);
156            trapframe.set_rdx(((msr >> 32) as u32) as usize);
157        }
158        TdxVirtualExceptionType::MsrWrite => {
159            let data = trapframe.rax() as u64 | ((trapframe.rdx() as u64) << 32);
160            unsafe { wrmsr(trapframe.rcx() as u32, data).unwrap() };
161        }
162        TdxVirtualExceptionType::CpuId => {
163            let cpuid_info = cpuid(trapframe.rax() as u32, trapframe.rcx() as u32).unwrap();
164            let mask = 0xFFFF_FFFF_0000_0000_usize;
165            trapframe.set_rax((trapframe.rax() & mask) | cpuid_info.eax);
166            trapframe.set_rbx((trapframe.rbx() & mask) | cpuid_info.ebx);
167            trapframe.set_rcx((trapframe.rcx() & mask) | cpuid_info.ecx);
168            trapframe.set_rdx((trapframe.rdx() & mask) | cpuid_info.edx);
169        }
170        TdxVirtualExceptionType::EptViolation => {
171            if is_protected_gpa(ve_info.guest_physical_address as TdxGpa) {
172                serial_println!("Unexpected EPT-violation on private memory");
173                hlt();
174            }
175            instr_len = handle_mmio(trapframe, ve_info).unwrap() as u32;
176        }
177        TdxVirtualExceptionType::Other => {
178            serial_println!("Unknown TDX virtual exception type");
179            hlt();
180        }
181        _ => return,
182    }
183    trapframe.set_rip(trapframe.rip() + instr_len as usize);
184}
185
186pub fn reduce_unnecessary_ve() -> Result<(), TopologyError> {
187    if tdcall::write_td_metadata(
188        metadata::TD_CTLS,
189        metadata::TdCtls::REDUCE_VE.bits(),
190        metadata::TdCtls::REDUCE_VE.bits(),
191    )
192    .is_ok()
193    {
194        return Ok(());
195    }
196
197    enable_cpu_topology_enumeration()
198}
199
200/// Accepts a range of physical memory to be used as TDX private memory.
201///
202/// # Safety
203///
204/// The caller must ensure the following invariants are met:
205/// - **Address Validity**: The GPA range `[gpa_start, gpa_end)` must represent a valid range.
206/// - **State Invariant**: The target memory pages must be in the `Pending` state.
207///   Accepting pages that are already `Accepted` or in an uninitialized state will
208///   result in a TDX instruction error.
209/// - **Exclusive Access**: The caller must ensure no other CPU context is
210///   simultaneously attempting to accept or access this specific GPA range to
211///   prevent race conditions in the TDX Module's metadata.
212/// - **Alignment**: While the function checks basic 4K alignment, the caller must ensure
213///   the range corresponds to actual physical backing store provided by the VMM.
214pub unsafe fn accept_memory(gpa_start: u64, gpa_end: u64) -> Result<(), AcceptError> {
215    if gpa_start >= gpa_end {
216        return Ok(());
217    }
218
219    if (gpa_start & (PageLevel::L1_4K.bytes() - 1)) != 0 {
220        return Err(AcceptError::InvalidAlignment);
221    }
222
223    let mut current_addr = gpa_start;
224
225    while current_addr < gpa_end {
226        let len = gpa_end - current_addr;
227        let mut accepted = false;
228
229        for &level in &PageLevel::PRIORITIES {
230            match try_accept_one(current_addr, len, level)? {
231                TryAcceptResult::Accepted(size) => {
232                    current_addr += size;
233                    accepted = true;
234                    break;
235                }
236                TryAcceptResult::SizeMismatch | TryAcceptResult::SkipLevel => {
237                    // Try next (smaller) page level
238                    continue;
239                }
240            }
241        }
242
243        if !accepted {
244            // Fails if even L1_4K cannot be accepted
245            return Err(AcceptError::InvalidAlignment);
246        }
247    }
248    Ok(())
249}
250
251pub fn enable_cpu_topology_enumeration() -> Result<(), TopologyError> {
252    let configured = tdcall::read_td_metadata(metadata::TOPOLOGY_ENUM_CONFIGURED)?;
253
254    if configured == 0 {
255        return Err(TopologyError::NotConfigured);
256    }
257
258    tdcall::write_td_metadata(
259        metadata::TD_CTLS,
260        metadata::TdCtls::ENUM_TOPOLOGY.bits(),
261        metadata::TdCtls::ENUM_TOPOLOGY.bits(),
262    )?;
263
264    Ok(())
265}
266
267pub fn disable_sept_ve(td_attr: TdAttributes) -> Result<(), SeptVeError> {
268    let debug = td_attr.contains(TdAttributes::DEBUG);
269
270    let config = ConfigFlags::from_bits_truncate(tdcall::read_td_metadata(metadata::CONFIG_FLAGS)?);
271
272    if !config.contains(ConfigFlags::FLEXIBLE_PENDING_VE) {
273        if td_attr.contains(TdAttributes::SEPT_VE_DISABLE) {
274            return Ok(());
275        }
276
277        if !debug {
278            return Err(SeptVeError::Misconfiguration);
279        }
280        return Ok(());
281    }
282
283    let controls =
284        metadata::TdCtls::from_bits_truncate(tdcall::read_td_metadata(metadata::TD_CTLS)?);
285
286    if controls.contains(metadata::TdCtls::PENDING_VE_DISABLE) {
287        return Ok(());
288    }
289
290    if debug {
291        return Ok(());
292    }
293
294    tdcall::write_td_metadata(
295        metadata::TD_CTLS,
296        metadata::TdCtls::PENDING_VE_DISABLE.bits(),
297        metadata::TdCtls::PENDING_VE_DISABLE.bits(),
298    )?;
299
300    Ok(())
301}
302
303bitflags! {
304    /// TdAttributes is defined as a 64b field that specifies various attested guest TD attributes.
305    pub struct TdAttributes: u64 {
306        /// Guest TD runs in off-TD debug mode. Its VCPU state and private memory are accessible by the host VMM.
307        /// DEBUG may not be set if MIGRATABLE is set.
308        const DEBUG = 1 << 0;
309        /// The TD is subject to HGS+ operation. HGS+ monitors the TD operation as part of the whole system.
310        /// This bit may be set, if supported by the TDX module, regardless of CPU support.
311        const HGS_PLUS_PROF = 1 << 4;
312        /// The TD is subject to system profiling using performance monitoring counters.
313        /// Those counters are not context-switched on TD entry and exit; they monitor the TD operation as part of the whole system.
314        /// This bit may be set, if supported by the TDX module, regardless of CPU support.
315        const PERF_PROF = 1 << 5;
316        /// The TD is subject to system profiling using core out-of-band telemetry.
317        /// Core telemetry monitors the TD operation as part of the whole system.
318        /// This bit may be set, if supported by the TDX module, regardless of CPU support.
319        const PMT_PROF = 1 << 6;
320        /// Indicates that the TDX module must use Instruction-Count based Single-Step Defense to protect against single-step attacks.
321        /// ICSSD may not be set if PERFMON is set.
322        /// This bit may only be set if the TDX module supports ICSSD.
323        const ICSSD = 1 << 16;
324        /// TD is allowed to use Linear Address Space Separation.
325        /// This bit may only be set if both the TDX module and the CPU support LASS.
326        const LASS = 1 << 27;
327        /// Disable EPT violation conversion to #VE(PENDING) on guest TD access of PENDING pages.
328        const SEPT_VE_DISABLE = 1 << 28;
329        /// TD is migratable (using a Migration TD).
330        /// MIGRATABLE may not be set if either DEBUG or PERFMON is set.
331        /// MIGRATABLE may not be set if CONFIG_FLAGS.TDX_CONNECT is set.
332        /// This bit may only be set if the TDX module supports TD Migration.
333        const MIGRATABLE = 1 << 29;
334        /// TD is allowed to use Supervisor Protection Keys.
335        /// This bit may only be set if both the TDX module and the CPU support PKS.
336        const PKS = 1 << 30;
337        /// TD is allowed to use Key Locker.
338        /// This bit may only be set if both the TDX module and the CPU support Key Locker.
339        const KL = 1 << 31;
340        /// The TD is a TDX Connect Provisioning Agent. This bit may only be set if both the TDX module and the CPU support TDX Connect.
341        const TPA = 1 << 62;
342        /// TD is allowed to use Perfmon and PERF_METRICS capabilities.
343        /// PERFMON may not be set if either MIGRATABLE or ICSSD is set.
344        /// This bit may only be set if the TDX Module supports Performance Monitoring virtualization.
345        const PERFMON = 1 << 63;
346    }
347}
348
349pub mod metadata {
350    /// Non-attested TD configuration flags.
351    pub const CONFIG_FLAGS: u64 = 0x1110000300000016;
352    /// A bitmap of TD controls that may be modified during TD run time.
353    pub const TD_CTLS: u64 = 0x1110000300000017;
354    /// Enable guest notification of events.
355    pub const NOTIFY_ENABLES: u64 = 0x9100000000000010;
356    /// Indicates whether virtual topology enumeration has been successfully configured.
357    pub const TOPOLOGY_ENUM_CONFIGURED: u64 = 0x9100000000000019;
358
359    use crate::bitflags;
360    bitflags! {
361        /// TD Control flags
362        pub struct TdCtls: u64 {
363            /// Controls the way guest TD access to a PENDING page is processed.
364            const PENDING_VE_DISABLE = 1 << 0;
365            /// Controls the enumeration of virtual platform topology.
366            const ENUM_TOPOLOGY = 1 << 1;
367            /// Controls the virtualization of CPUID(2).
368            const VIRT_CPUID2 = 1 << 2;
369            /// Allows the guest TD to control the way #VE is injected by the TDX module
370            /// on guest TD execution of CPUID, RDMSR/WRMSR and other instructions.
371            const REDUCE_VE = 1 << 3;
372            /// Controls whether a migratable TD can request a sealing key using TDG.MR.KEY.GET.
373            const FORCE_HW_KEYS = 1 << 4;
374            /// Controls locking of TD-writable virtualization controls.
375            const LOCK = 1 << 63;
376        }
377    }
378}
379
380pub(crate) fn is_protected_gpa(gpa: TdxGpa) -> bool {
381    let mask = SHARED_MASK.load(Relaxed);
382    let gpa_u64 = u64::try_from(gpa).expect("TdxGpa must fit into u64 on x86_64");
383    (gpa_u64 & mask) == 0
384}
385
386fn check_tdx_guest() -> Result<(), InitError> {
387    let max_leaf = cpuid_count(0, 0).eax;
388    if max_leaf < TDX_CPUID_LEAF_ID {
389        return Err(InitError::TdxCpuLeafIdTooLow);
390    }
391    if !is_tdx_hardware_present() {
392        return Err(InitError::TdxVendorIdMismatch);
393    }
394
395    Ok(())
396}
397
398fn is_tdx_hardware_present() -> bool {
399    let res: CpuIdResult = cpuid_count(TDX_CPUID_LEAF_ID, 0);
400
401    let mut sig = [0u8; 12];
402    sig[0..4].copy_from_slice(&res.ebx.to_le_bytes());
403    sig[4..8].copy_from_slice(&res.edx.to_le_bytes());
404    sig[8..12].copy_from_slice(&res.ecx.to_le_bytes());
405
406    &sig == TDX_IDENT
407}
408
409#[derive(Debug, Copy, Clone, PartialEq, Eq)]
410#[repr(u8)]
411enum TdxEarlyState {
412    Uninitialized = 0,
413    Enabled = 1,
414    Disabled = 2,
415}
416
417impl TdxEarlyState {
418    const fn from_u8(value: u8) -> Self {
419        match value {
420            1 => Self::Enabled,
421            2 => Self::Disabled,
422            _ => Self::Uninitialized,
423        }
424    }
425}
426
427impl From<u8> for TdxEarlyState {
428    fn from(value: u8) -> Self {
429        Self::from_u8(value)
430    }
431}
432
433static TDX_EARLY_STATE: AtomicU8 = AtomicU8::new(TdxEarlyState::Uninitialized as u8);
434static TDX_ENABLED: AtomicBool = AtomicBool::new(false);
435
436const TDX_IDENT: &[u8; 12] = b"IntelTDX    ";
437const TDX_CPUID_LEAF_ID: u32 = 0x21;
438
439/// Attempts to accept a single memory page at the specified level.
440fn try_accept_one(
441    start: u64,
442    len: u64,
443    page_level: PageLevel,
444) -> Result<TryAcceptResult, AcceptError> {
445    let size = page_level.bytes();
446
447    if (start & (size - 1)) != 0 || len < size {
448        return Ok(TryAcceptResult::SkipLevel);
449    }
450
451    match unsafe { accept_page(page_level as u64, start) } {
452        Ok(_) => Ok(TryAcceptResult::Accepted(size)),
453        Err(e) => match e {
454            // PageAlreadyAccepted: If 4K, it is fully accepted. If larger, earlier stages
455            // might have accepted only a subregion (e.g. 4K out of 2M), so we must split.
456            TdCallError::TdxPageAlreadyAccepted => {
457                if page_level == PageLevel::L1_4K {
458                    Ok(TryAcceptResult::Accepted(size))
459                } else {
460                    Ok(TryAcceptResult::SizeMismatch)
461                }
462            }
463            // PageSizeMismatch: VMM mapped it differently.
464            // OperandInvalid: Hardware doesn't support this size or address is rejected.
465            TdCallError::TdxPageSizeMismatch | TdCallError::TdxOperandInvalid => {
466                if page_level == PageLevel::L1_4K {
467                    // If the minimum architectural unit is rejected, it's a fatal error.
468                    Err(AcceptError::TdCall(e))
469                } else {
470                    // Fall back to a smaller page size.
471                    Ok(TryAcceptResult::SizeMismatch)
472                }
473            }
474            _ => Err(AcceptError::TdCall(e)),
475        },
476    }
477}
478
479bitflags! {
480    struct ConfigFlags: u64 {
481        /// GPAW (Guest Physical Address Width) controls the position of the SHARED bit in GPA.
482        /// It is copied to each TD VMCS and L2 VMCS GPAW execution control on TDH.VP.INIT and TDH.IMPORT.STATE.VP.
483        const GPAW = 1 << 0;
484        /// Controls the guest TD’s ability to change the PENDING page access behavior from its default value.
485        const FLEXIBLE_PENDING_VE = 1 << 1;
486        /// Controls whether RBP value can be modified by TDG.VP.VMCALL and TDH.VP.ENTER.
487        const NO_RBP_MOD = 1 << 2;
488        /// Controls virtualization of physical address width, as enumerated by CPUID(0x80000008).EAX[7:0].
489        const MAXPA_VIRT = 1 << 3;
490        /// Controls virtualization of guest physical address width, as enumerated by CPUID(0x80000008).EAX[23:16].
491        const MAXGPA_VIRT = 1 << 4;
492        /// Enables TDX Connect for the current TD.
493        const TDX_CONNECT = 1 << 5;
494        /// Enables TDG.MEM.PAGE.RELEASE for the current TD.
495        const PAGE_RELEASE = 1 << 6;
496    }
497}
498
499#[derive(Clone, Copy, Debug, Eq, PartialEq)]
500#[repr(u8)]
501enum PageLevel {
502    L1_4K = 0,
503    L2_2M = 1,
504    L3_1G = 2,
505}
506
507impl PageLevel {
508    pub const PRIORITIES: [Self; 3] = [Self::L3_1G, Self::L2_2M, Self::L1_4K];
509
510    pub const fn bytes(self) -> u64 {
511        1 << (12 + (self as u32) * 9)
512    }
513}
514
515/// Represents the result of a single page acceptance attempt.
516enum TryAcceptResult {
517    /// Successfully accepted a page of the given size.
518    Accepted(u64),
519    /// Current address or length is not aligned/sufficient for this level.
520    SkipLevel,
521    /// Hardware/VMM reports a size mismatch or lack of support for this level.
522    SizeMismatch,
523}
524
525impl From<TdCallError> for TopologyError {
526    fn from(err: TdCallError) -> Self {
527        TopologyError::TdCall(err)
528    }
529}
530
531impl From<TdCallError> for SeptVeError {
532    fn from(err: TdCallError) -> Self {
533        SeptVeError::TdCall(err)
534    }
535}
536
537impl From<TdCallError> for AcceptError {
538    fn from(err: TdCallError) -> Self {
539        AcceptError::TdCall(err)
540    }
541}