use anyhow::{Context, Result};
use vm_memory::{Bytes, GuestAddress, GuestMemoryMmap};
use zerocopy::IntoBytes;
use super::topology::apic_id;
use crate::vmm::numa_mem::NumaMemoryLayout;
use crate::vmm::topology::Topology;
const RSDP_ADDR: u64 = 0x000E_0000;
const RSDP_SIZE: u64 = 36;
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub struct AcpiLayout {
pub dsdt_addr: u64,
pub dsdt_size: u64,
pub madt_addr: u64,
pub madt_size: u64,
pub fadt_addr: u64,
pub fadt_size: u64,
pub srat_addr: u64,
pub srat_size: u64,
pub slit_addr: u64,
pub slit_size: u64,
pub hmat_addr: u64,
pub hmat_size: u64,
pub rsdt_addr: u64,
pub rsdt_size: u64,
pub xsdt_addr: u64,
pub xsdt_size: u64,
pub rsdp_addr: u64,
pub rsdp_size: u64,
}
const FADT_F_PWR_BUTTON: u32 = 1 << 4;
const FADT_F_SLP_BUTTON: u32 = 1 << 5;
const IOAPIC_ADDR: u32 = 0xFEC0_0000;
const IOAPIC_ID: u8 = 0;
const LAPIC_ADDR: u32 = 0xFEE0_0000;
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct SdtHeader {
signature: [u8; 4],
length: u32,
revision: u8,
checksum: u8,
oem_id: [u8; 6],
oem_table_id: [u8; 8],
oem_revision: u32,
creator_id: [u8; 4],
creator_revision: u32,
}
impl SdtHeader {
fn new(sig: &[u8; 4], length: u32, revision: u8) -> Self {
Self {
signature: *sig,
length,
revision,
oem_id: *b"KTSTR\0",
oem_table_id: {
let mut id = [0u8; 8];
let prefix = b"KTSR";
id[..prefix.len()].copy_from_slice(prefix);
id[prefix.len()..prefix.len() + sig.len()].copy_from_slice(sig);
id
},
oem_revision: 1,
creator_id: *b"KTSR",
creator_revision: 1,
..Default::default()
}
}
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct Rsdp {
signature: [u8; 8],
checksum: u8,
oem_id: [u8; 6],
revision: u8,
rsdt_address: u32,
length: u32,
xsdt_address: u64,
extended_checksum: u8,
_reserved: [u8; 3],
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtHeader {
sdt: SdtHeader,
local_apic_address: u32,
flags: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtLocalApic {
entry_type: u8,
length: u8,
processor_id: u8,
apic_id: u8,
flags: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtX2Apic {
entry_type: u8,
length: u8,
_reserved: u16,
x2apic_id: u32,
flags: u32,
processor_uid: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtIoApic {
entry_type: u8,
length: u8,
io_apic_id: u8,
_reserved: u8,
io_apic_address: u32,
gsi_base: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtIso {
entry_type: u8,
length: u8,
bus: u8,
source: u8,
gsi: u32,
flags: u16,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtLapicNmi {
entry_type: u8,
length: u8,
processor_id: u8,
flags: u16,
lint: u8,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct MadtX2ApicNmi {
entry_type: u8,
length: u8,
flags: u16,
processor_uid: u32,
lint: u8,
_reserved: [u8; 3],
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct SratCpuAffinity {
entry_type: u8,
length: u8,
_reserved0: u16,
proximity_domain: u32,
x2apic_id: u32,
flags: u32,
clock_domain: u32,
_reserved1: u32,
}
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct SratMemAffinity {
entry_type: u8,
length: u8,
proximity_domain: u32,
_reserved0: u16,
base_address: u64,
address_length: u64,
_reserved1: u32,
flags: u32,
_reserved2: u64,
}
fn acpi_checksum(data: &[u8]) -> u8 {
let sum: u8 = data.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
(!sum).wrapping_add(1)
}
fn set_sdt_checksum(buf: &mut [u8]) {
buf[9] = 0;
buf[9] = acpi_checksum(buf);
}
pub fn setup_acpi(
mem: &GuestMemoryMmap,
topo: &Topology,
numa_layout: &NumaMemoryLayout,
) -> Result<AcpiLayout> {
let num_cpus = topo.total_cpus();
let emit_hmat = topo.numa_nodes > 1;
let dsdt_size: u64 = 36;
let madt_size = compute_madt_size(topo) as u64;
let fadt_size: u64 = 276;
let num_mem_regions = numa_layout.regions().len() as u32;
let srat_size: u64 =
(48 + std::mem::size_of::<SratCpuAffinity>() as u32 * num_cpus
+ std::mem::size_of::<SratMemAffinity>() as u32 * num_mem_regions) as u64;
let n = topo.numa_nodes as u64;
let slit_size: u64 = 36 + 8 + n * n;
let hmat_size: u64 = if emit_hmat {
compute_hmat_size(topo, numa_layout) as u64
} else {
0
};
let table_count: u64 = if emit_hmat { 5 } else { 4 };
let rsdt_size: u64 = 36 + table_count * 4;
let xsdt_size: u64 = 36 + table_count * 8;
let mut cursor = RSDP_ADDR + RSDP_SIZE;
let dsdt_addr = cursor;
cursor += dsdt_size;
let madt_addr = cursor;
cursor += madt_size;
let fadt_addr = cursor;
cursor += fadt_size;
let srat_addr = cursor;
cursor += srat_size;
let slit_addr = cursor;
cursor += slit_size;
let hmat_addr = cursor;
cursor += hmat_size;
let rsdt_addr = cursor;
cursor += rsdt_size;
let xsdt_addr = cursor;
let layout = AcpiLayout {
dsdt_addr,
dsdt_size,
madt_addr,
madt_size,
fadt_addr,
fadt_size,
srat_addr,
srat_size,
slit_addr,
slit_size,
hmat_addr,
hmat_size,
rsdt_addr,
rsdt_size,
xsdt_addr,
xsdt_size,
rsdp_addr: RSDP_ADDR,
rsdp_size: RSDP_SIZE,
};
write_dsdt(mem, dsdt_addr)?;
write_madt(mem, topo, madt_addr)?;
write_fadt(mem, &layout)?;
write_srat(mem, topo, numa_layout, srat_addr)?;
write_slit(mem, topo, slit_addr)?;
if emit_hmat {
write_hmat(mem, topo, numa_layout, hmat_addr)?;
}
write_rsdt(mem, &layout)?;
write_xsdt(mem, &layout)?;
write_rsdp(mem, &layout)?;
Ok(layout)
}
fn write_rsdp(mem: &GuestMemoryMmap, layout: &AcpiLayout) -> Result<()> {
let mut rsdp = Rsdp {
signature: *b"RSD PTR ",
oem_id: *b"KTSTR\0",
revision: 2,
rsdt_address: layout.rsdt_addr as u32,
length: 36,
xsdt_address: layout.xsdt_addr,
..Default::default()
};
rsdp.checksum = acpi_checksum(&rsdp.as_bytes()[..20]);
rsdp.extended_checksum = acpi_checksum(rsdp.as_bytes());
mem.write_slice(rsdp.as_bytes(), GuestAddress(RSDP_ADDR))
.context("write RSDP")?;
Ok(())
}
fn write_rsdt(mem: &GuestMemoryMmap, layout: &AcpiLayout) -> Result<()> {
let len = layout.rsdt_size as usize;
let mut buf = vec![0u8; len];
let hdr = SdtHeader::new(b"RSDT", len as u32, 1);
buf[..36].copy_from_slice(hdr.as_bytes());
let mut offset = 36;
for addr in rsdt_entries(layout) {
buf[offset..offset + 4].copy_from_slice(&(addr as u32).to_le_bytes());
offset += 4;
}
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(layout.rsdt_addr))
.context("write RSDT")?;
Ok(())
}
fn write_xsdt(mem: &GuestMemoryMmap, layout: &AcpiLayout) -> Result<()> {
let len = layout.xsdt_size as usize;
let mut buf = vec![0u8; len];
let hdr = SdtHeader::new(b"XSDT", len as u32, 1);
buf[..36].copy_from_slice(hdr.as_bytes());
let mut offset = 36;
for addr in rsdt_entries(layout) {
buf[offset..offset + 8].copy_from_slice(&addr.to_le_bytes());
offset += 8;
}
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(layout.xsdt_addr))
.context("write XSDT")?;
Ok(())
}
fn rsdt_entries(layout: &AcpiLayout) -> Vec<u64> {
let mut entries = vec![
layout.fadt_addr,
layout.madt_addr,
layout.srat_addr,
layout.slit_addr,
];
if layout.hmat_size > 0 {
entries.push(layout.hmat_addr);
}
entries
}
fn write_dsdt(mem: &GuestMemoryMmap, addr: u64) -> Result<()> {
let mut buf = vec![0u8; 36];
let hdr = SdtHeader::new(b"DSDT", 36, 2);
buf[..36].copy_from_slice(hdr.as_bytes());
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(addr))
.context("write DSDT")?;
Ok(())
}
fn write_fadt(mem: &GuestMemoryMmap, layout: &AcpiLayout) -> Result<()> {
let mut buf = vec![0u8; 276];
let hdr = SdtHeader::new(b"FACP", 276, 6);
buf[..36].copy_from_slice(hdr.as_bytes());
buf[40..44].copy_from_slice(&(layout.dsdt_addr as u32).to_le_bytes());
buf[140..148].copy_from_slice(&layout.dsdt_addr.to_le_bytes());
let flags = FADT_F_PWR_BUTTON | FADT_F_SLP_BUTTON;
buf[112..116].copy_from_slice(&flags.to_le_bytes());
buf[131] = 5;
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(layout.fadt_addr))
.context("write FADT")?;
Ok(())
}
fn write_srat(
mem: &GuestMemoryMmap,
topo: &Topology,
numa_layout: &NumaMemoryLayout,
addr: u64,
) -> Result<()> {
let num_cpus = topo.total_cpus();
let num_mem_regions = numa_layout.regions().len() as u32;
let len = 48
+ std::mem::size_of::<SratCpuAffinity>() as u32 * num_cpus
+ std::mem::size_of::<SratMemAffinity>() as u32 * num_mem_regions;
let mut buf = vec![0u8; len as usize];
let hdr = SdtHeader::new(b"SRAT", len, 3);
buf[..36].copy_from_slice(hdr.as_bytes());
buf[36..40].copy_from_slice(&1u32.to_le_bytes());
let mut offset = 48;
for cpu_id in 0..num_cpus {
let (llc_id, _, _) = topo.decompose(cpu_id);
let node_id = topo.numa_node_of(llc_id);
let entry = SratCpuAffinity {
entry_type: 2,
length: std::mem::size_of::<SratCpuAffinity>() as u8,
proximity_domain: node_id,
x2apic_id: apic_id(topo, cpu_id),
flags: 1,
..Default::default()
};
let bytes = entry.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
let regions = numa_layout.regions();
for region in regions {
let length = region.size;
let entry = SratMemAffinity {
entry_type: 1,
length: std::mem::size_of::<SratMemAffinity>() as u8,
proximity_domain: region.node_id,
base_address: region.gpa_start,
address_length: length,
flags: 1,
..Default::default()
};
let bytes = entry.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(addr))
.context("write SRAT")?;
Ok(())
}
fn write_slit(mem: &GuestMemoryMmap, topo: &Topology, addr: u64) -> Result<()> {
let n = topo.numa_nodes as u64;
let len = 36 + 8 + n * n;
let mut buf = vec![0u8; len as usize];
let hdr = SdtHeader::new(b"SLIT", len as u32, 1);
buf[..36].copy_from_slice(hdr.as_bytes());
buf[36..44].copy_from_slice(&n.to_le_bytes());
let matrix_start = 44;
for i in 0..n {
for j in 0..n {
buf[matrix_start + (i * n + j) as usize] = topo.distance(i as u32, j as u32);
}
}
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(addr))
.context("write SLIT")?;
Ok(())
}
const HMAT_LATENCY_BASE_PS: u64 = 100_000;
const HMAT_DRAM_LATENCY_ENTRY: u16 = 1;
const HMAT_CXL_LATENCY_ENTRY: u16 = 3;
const HMAT_BW_BASE_MBS: u64 = 10_240;
const HMAT_DRAM_BW_ENTRY: u16 = 5;
const HMAT_CXL_BW_ENTRY: u16 = 2;
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct HmatMpda {
hmat_type: u16,
_reserved0: u16,
length: u32,
flags: u16,
_reserved1: u16,
initiator_proximity_domain: u32,
memory_proximity_domain: u32,
_reserved2: u32,
_reserved3: u64,
_reserved4: u64,
}
const _: () = assert!(std::mem::size_of::<HmatMpda>() == 40);
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct HmatSllbiHeader {
hmat_type: u16,
_reserved0: u16,
length: u32,
flags: u8,
data_type: u8,
min_transfer_size: u8,
_reserved1: u8,
num_initiator_pds: u32,
num_target_pds: u32,
_reserved2: u32,
entry_base_unit: u64,
}
const _: () = assert!(std::mem::size_of::<HmatSllbiHeader>() == 32);
#[repr(C, packed)]
#[derive(Clone, Copy, Default, IntoBytes, zerocopy::Immutable, zerocopy::KnownLayout)]
struct HmatMsci {
hmat_type: u16,
_reserved0: u16,
length: u32,
memory_proximity_domain: u32,
_reserved1: u32,
cache_size: u64,
cache_attributes: u32,
address_mode: u16,
_num_smbios_handles: u16,
}
const _: () = assert!(std::mem::size_of::<HmatMsci>() == 32);
fn compute_hmat_size(topo: &Topology, numa_layout: &NumaMemoryLayout) -> u32 {
let num_initiators = topo.cpu_bearing_nodes();
let num_targets = numa_layout.regions().len() as u32;
let num_mpdas = num_targets;
let hmat_header = 40u32;
let mpda_size = std::mem::size_of::<HmatMpda>() as u32 * num_mpdas;
let sllbi_size = (std::mem::size_of::<HmatSllbiHeader>() as u32
+ 4 * num_initiators
+ 4 * num_targets
+ 2 * num_initiators * num_targets)
* 2;
let num_msci = topo
.nodes
.map(|nodes| nodes.iter().filter(|n| n.mem_side_cache.is_some()).count() as u32)
.unwrap_or(0);
let msci_size = std::mem::size_of::<HmatMsci>() as u32 * num_msci;
hmat_header + mpda_size + sllbi_size + msci_size
}
fn write_hmat(
mem: &GuestMemoryMmap,
topo: &Topology,
numa_layout: &NumaMemoryLayout,
addr: u64,
) -> Result<()> {
let len = compute_hmat_size(topo, numa_layout);
let mut buf = vec![0u8; len as usize];
let hdr = SdtHeader::new(b"HMAT", len, 2);
buf[..36].copy_from_slice(hdr.as_bytes());
let mut offset = 40usize;
let initiators: Vec<u32> = (0..topo.numa_nodes)
.filter(|&n| topo.llcs_in_node(n) > 0)
.collect();
let targets: Vec<u32> = numa_layout.regions().iter().map(|r| r.node_id).collect();
for &target_node in &targets {
let initiator = if topo.llcs_in_node(target_node) > 0 {
target_node
} else {
*initiators
.iter()
.min_by_key(|&&i| topo.distance(i, target_node))
.unwrap_or(&initiators[0])
};
let mpda = HmatMpda {
hmat_type: 0,
length: std::mem::size_of::<HmatMpda>() as u32,
flags: 3,
initiator_proximity_domain: initiator,
memory_proximity_domain: target_node,
..Default::default()
};
let bytes = mpda.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
let ni = initiators.len() as u32;
let nt = targets.len() as u32;
for (data_type, base_unit) in [(0u8, HMAT_LATENCY_BASE_PS), (3u8, HMAT_BW_BASE_MBS)] {
let sllbi_len =
std::mem::size_of::<HmatSllbiHeader>() as u32 + 4 * ni + 4 * nt + 2 * ni * nt;
let sllbi_hdr = HmatSllbiHeader {
hmat_type: 1,
length: sllbi_len,
flags: 0,
data_type,
min_transfer_size: 0,
num_initiator_pds: ni,
num_target_pds: nt,
entry_base_unit: base_unit,
..Default::default()
};
let bytes = sllbi_hdr.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
for &i in &initiators {
buf[offset..offset + 4].copy_from_slice(&i.to_le_bytes());
offset += 4;
}
for &t in &targets {
buf[offset..offset + 4].copy_from_slice(&t.to_le_bytes());
offset += 4;
}
for &init_node in &initiators {
for &tgt_node in &targets {
let is_cxl = topo
.nodes
.is_some_and(|nodes| nodes[tgt_node as usize].is_memory_only());
let entry = if data_type == 0 {
let base = topo
.nodes
.and_then(|nodes| nodes[tgt_node as usize].latency_ns)
.map(|ns| {
let ps = ns as u64 * 1000;
(ps / base_unit).max(1) as u16
})
.unwrap_or(if is_cxl {
HMAT_CXL_LATENCY_ENTRY
} else {
HMAT_DRAM_LATENCY_ENTRY
});
let dist = topo.distance(init_node, tgt_node) as u32;
((base as u32 * dist / 10) as u16).max(1)
} else {
let base = topo
.nodes
.and_then(|nodes| nodes[tgt_node as usize].bandwidth_mbs)
.map(|mbs| (mbs as u64 / base_unit).max(1) as u16)
.unwrap_or(if is_cxl {
HMAT_CXL_BW_ENTRY
} else {
HMAT_DRAM_BW_ENTRY
});
let dist = topo.distance(init_node, tgt_node) as u32;
((base as u32 * 10 / dist.max(1)) as u16).max(1)
};
buf[offset..offset + 2].copy_from_slice(&entry.to_le_bytes());
offset += 2;
}
}
}
if let Some(nodes) = topo.nodes {
for (i, node) in nodes.iter().enumerate() {
if let Some(cache) = &node.mem_side_cache {
let attrs: u32 = 1 | (1 << 4) | ((cache.associativity as u32 & 0xF) << 8)
| ((cache.write_policy as u32 & 0xF) << 12)
| ((cache.line_size as u32) << 16);
let msci = HmatMsci {
hmat_type: 2,
length: std::mem::size_of::<HmatMsci>() as u32,
memory_proximity_domain: i as u32,
cache_size: cache.size,
cache_attributes: attrs,
..Default::default()
};
let bytes = msci.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
}
}
debug_assert_eq!(offset, len as usize);
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(addr))
.context("write HMAT")?;
Ok(())
}
fn use_x2apic_entry(apic_id: u32) -> bool {
apic_id >= 255
}
fn compute_madt_size(topo: &Topology) -> u32 {
let num_cpus = topo.total_cpus();
let mut cpu_entries_size: u32 = 0;
let mut has_x2apic = false;
let mut has_lapic = false;
for cpu_id in 0..num_cpus {
if use_x2apic_entry(apic_id(topo, cpu_id)) {
cpu_entries_size += std::mem::size_of::<MadtX2Apic>() as u32;
has_x2apic = true;
} else {
cpu_entries_size += std::mem::size_of::<MadtLocalApic>() as u32;
has_lapic = true;
}
}
let nmi_size: u32 = if has_lapic {
std::mem::size_of::<MadtLapicNmi>() as u32
} else {
0
} + if has_x2apic {
std::mem::size_of::<MadtX2ApicNmi>() as u32
} else {
0
};
std::mem::size_of::<MadtHeader>() as u32
+ cpu_entries_size
+ std::mem::size_of::<MadtIoApic>() as u32
+ std::mem::size_of::<MadtIso>() as u32
+ nmi_size
}
fn write_madt(mem: &GuestMemoryMmap, topo: &Topology, addr: u64) -> Result<()> {
let num_cpus = topo.total_cpus();
let mut has_x2apic = false;
let mut has_lapic = false;
for cpu_id in 0..num_cpus {
if use_x2apic_entry(apic_id(topo, cpu_id)) {
has_x2apic = true;
} else {
has_lapic = true;
}
}
let len = compute_madt_size(topo);
let mut buf = vec![0u8; len as usize];
let hdr = MadtHeader {
sdt: SdtHeader::new(b"APIC", len, 3),
local_apic_address: LAPIC_ADDR,
flags: 1, };
buf[..std::mem::size_of::<MadtHeader>()].copy_from_slice(hdr.as_bytes());
let mut offset = std::mem::size_of::<MadtHeader>();
for cpu_id in 0..num_cpus {
let id = apic_id(topo, cpu_id);
if use_x2apic_entry(id) {
let entry = MadtX2Apic {
entry_type: 9,
length: std::mem::size_of::<MadtX2Apic>() as u8,
x2apic_id: id,
flags: 1,
processor_uid: cpu_id,
..Default::default()
};
let bytes = entry.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
} else {
let entry = MadtLocalApic {
entry_type: 0,
length: std::mem::size_of::<MadtLocalApic>() as u8,
processor_id: cpu_id as u8,
apic_id: id as u8,
flags: 1,
};
let bytes = entry.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
}
let ioapic = MadtIoApic {
entry_type: 1,
length: std::mem::size_of::<MadtIoApic>() as u8,
io_apic_id: IOAPIC_ID,
io_apic_address: IOAPIC_ADDR,
gsi_base: 0,
..Default::default()
};
let bytes = ioapic.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
let iso = MadtIso {
entry_type: 2,
length: std::mem::size_of::<MadtIso>() as u8,
bus: 0,
source: 0,
gsi: 2,
flags: 0,
};
let bytes = iso.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
if has_lapic {
let nmi = MadtLapicNmi {
entry_type: 4,
length: std::mem::size_of::<MadtLapicNmi>() as u8,
processor_id: 0xFF,
flags: 0,
lint: 1,
};
let bytes = nmi.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
if has_x2apic {
let nmi = MadtX2ApicNmi {
entry_type: 0x0A,
length: std::mem::size_of::<MadtX2ApicNmi>() as u8,
flags: 0,
processor_uid: 0xFFFF_FFFF,
lint: 1,
_reserved: [0; 3],
};
let bytes = nmi.as_bytes();
buf[offset..offset + bytes.len()].copy_from_slice(bytes);
}
set_sdt_checksum(&mut buf);
mem.write_slice(&buf, GuestAddress(addr))
.context("write MADT")?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn test_mem(mb: u32) -> GuestMemoryMmap {
GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), (mb as usize) << 20)]).unwrap()
}
fn test_layout(topo: &Topology, mb: u32) -> NumaMemoryLayout {
NumaMemoryLayout::compute(topo, mb, 0).unwrap()
}
fn test_setup(mem: &GuestMemoryMmap, topo: &Topology, mb: u32) -> AcpiLayout {
let layout = test_layout(topo, mb);
setup_acpi(mem, topo, &layout).unwrap()
}
fn read_table(mem: &GuestMemoryMmap, addr: u64) -> Vec<u8> {
let mut len_bytes = [0u8; 4];
mem.read_slice(&mut len_bytes, GuestAddress(addr + 4))
.unwrap();
let len = u32::from_le_bytes(len_bytes) as usize;
let mut buf = vec![0u8; len];
mem.read_slice(&mut buf, GuestAddress(addr)).unwrap();
buf
}
fn read_madt(mem: &GuestMemoryMmap, layout: &AcpiLayout) -> Vec<u8> {
read_table(mem, layout.madt_addr)
}
fn walk_madt_entries(madt: &[u8]) -> Vec<(u8, u8, &[u8])> {
let hdr_size = std::mem::size_of::<MadtHeader>();
let mut entries = Vec::new();
let mut offset = hdr_size;
while offset < madt.len() {
let entry_type = madt[offset];
let entry_len = madt[offset + 1];
entries.push((
entry_type,
entry_len,
&madt[offset..offset + entry_len as usize],
));
offset += entry_len as usize;
}
entries
}
const _: () = assert!(std::mem::size_of::<SdtHeader>() == 36);
const _: () = assert!(std::mem::size_of::<Rsdp>() == 36);
const _: () = assert!(std::mem::size_of::<MadtHeader>() == 44);
const _: () = assert!(std::mem::size_of::<MadtLocalApic>() == 8);
const _: () = assert!(std::mem::size_of::<MadtX2Apic>() == 16);
const _: () = assert!(std::mem::size_of::<MadtIoApic>() == 12);
const _: () = assert!(std::mem::size_of::<MadtIso>() == 10);
const _: () = assert!(std::mem::size_of::<MadtLapicNmi>() == 6);
const _: () = assert!(std::mem::size_of::<MadtX2ApicNmi>() == 12);
const _: () = assert!(std::mem::size_of::<SratCpuAffinity>() == 24);
const _: () = assert!(std::mem::size_of::<SratMemAffinity>() == 40);
#[test]
fn rsdp_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut rsdp = [0u8; 20];
mem.read_slice(&mut rsdp, GuestAddress(l.rsdp_addr))
.unwrap();
assert_eq!(&rsdp[..8], b"RSD PTR ");
let sum: u8 = rsdp.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0, "RSDP checksum must be zero");
}
#[test]
fn rsdt_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let rsdt = read_table(&mem, l.rsdt_addr);
assert_eq!(&rsdt[..4], b"RSDT");
let sum: u8 = rsdt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0, "RSDT checksum must be zero");
}
#[test]
fn madt_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
assert_eq!(&madt[..4], b"APIC");
let sum: u8 = madt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0, "MADT checksum must be zero");
}
#[test]
fn madt_has_correct_cpu_count() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 4,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let cpu_count = entries
.iter()
.filter(|(t, _, _)| *t == 0 || *t == 9)
.count();
assert_eq!(cpu_count, 16);
}
#[test]
fn madt_apic_ids_match_topology() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let mut cpu_idx = 0u32;
for (entry_type, _, data) in &entries {
match *entry_type {
0 => {
assert_eq!(data[3] as u32, apic_id(&topo, cpu_idx));
cpu_idx += 1;
}
9 => {
assert_eq!(
u32::from_le_bytes(data[4..8].try_into().unwrap()),
apic_id(&topo, cpu_idx)
);
cpu_idx += 1;
}
_ => {}
}
}
}
#[test]
fn madt_has_ioapic() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let ioapic = entries.iter().find(|(t, _, _)| *t == 1);
assert!(ioapic.is_some());
let (_, _, data) = ioapic.unwrap();
assert_eq!(
u32::from_le_bytes(data[4..8].try_into().unwrap()),
IOAPIC_ADDR
);
}
#[test]
fn rsdp_points_to_rsdt() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut rsdp = [0u8; 20];
mem.read_slice(&mut rsdp, GuestAddress(l.rsdp_addr))
.unwrap();
assert_eq!(
u32::from_le_bytes(rsdp[16..20].try_into().unwrap()),
l.rsdt_addr as u32
);
}
#[test]
fn rsdt_table_pointers() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut entry = [0u8; 4];
mem.read_slice(&mut entry, GuestAddress(l.rsdt_addr + 36))
.unwrap();
assert_eq!(u32::from_le_bytes(entry), l.fadt_addr as u32);
mem.read_slice(&mut entry, GuestAddress(l.rsdt_addr + 40))
.unwrap();
assert_eq!(u32::from_le_bytes(entry), l.madt_addr as u32);
mem.read_slice(&mut entry, GuestAddress(l.rsdt_addr + 44))
.unwrap();
assert_eq!(u32::from_le_bytes(entry), l.srat_addr as u32);
mem.read_slice(&mut entry, GuestAddress(l.rsdt_addr + 48))
.unwrap();
assert_eq!(u32::from_le_bytes(entry), l.slit_addr as u32);
}
#[test]
fn madt_has_iso_irq0_gsi2() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let iso = entries.iter().find(|(t, _, _)| *t == 2).unwrap();
assert_eq!(iso.2[3], 0);
assert_eq!(u32::from_le_bytes(iso.2[4..8].try_into().unwrap()), 2);
}
#[test]
fn madt_has_nmi() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
assert!(entries.iter().any(|(t, _, _)| *t == 4 || *t == 0x0A));
}
#[test]
fn small_topology_uses_lapic_entries() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 4,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
assert_eq!(entries.iter().filter(|(t, _, _)| *t == 9).count(), 0);
assert_eq!(entries.iter().filter(|(t, _, _)| *t == 0).count(), 16);
assert!(entries.iter().any(|(t, _, _)| *t == 4));
assert!(!entries.iter().any(|(t, _, _)| *t == 0x0A));
}
#[test]
fn large_topology_uses_mixed_entries() {
let mem = test_mem(16);
let topo = Topology {
llcs: 14,
cores_per_llc: 9,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let mut has_low = false;
let mut has_high = false;
for cpu_id in 0..topo.total_cpus() {
let id = apic_id(&topo, cpu_id);
if id < 255 {
has_low = true;
} else {
has_high = true;
}
}
assert!(has_low && has_high);
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let lapic_count = entries.iter().filter(|(t, _, _)| *t == 0).count();
let x2apic_count = entries.iter().filter(|(t, _, _)| *t == 9).count();
assert!(lapic_count > 0);
assert!(x2apic_count > 0);
assert_eq!(lapic_count + x2apic_count, topo.total_cpus() as usize);
assert!(entries.iter().any(|(t, _, _)| *t == 4));
assert!(entries.iter().any(|(t, _, _)| *t == 0x0A));
}
#[test]
fn x2apic_nmi_fields_correct() {
let mem = test_mem(16);
let topo = Topology {
llcs: 14,
cores_per_llc: 9,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let (_, len, data) = entries.iter().find(|(t, _, _)| *t == 0x0A).unwrap();
assert_eq!(*len, 12);
assert_eq!(u16::from_le_bytes(data[2..4].try_into().unwrap()), 0);
assert_eq!(
u32::from_le_bytes(data[4..8].try_into().unwrap()),
0xFFFF_FFFF
);
assert_eq!(data[8], 1);
}
#[test]
fn lapic_nmi_fields_correct() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let (_, len, data) = entries.iter().find(|(t, _, _)| *t == 4).unwrap();
assert_eq!(*len, 6);
assert_eq!(data[2], 0xFF);
assert_eq!(u16::from_le_bytes(data[3..5].try_into().unwrap()), 0);
assert_eq!(data[5], 1);
}
#[test]
fn madt_checksum_representative_topologies() {
let topos = [
(1, 1, 1), (2, 1, 1), (3, 3, 1), (1, 1, 2), (2, 4, 2), (7, 5, 3), (15, 16, 1), (14, 9, 2), (2, 128, 1), (14, 18, 1), ];
for (llcs, cores, threads) in topos {
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: cores,
threads_per_core: threads,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let sum: u8 = madt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(
sum, 0,
"MADT checksum failed for {llcs}l/{cores}c/{threads}t"
);
let entries = walk_madt_entries(&madt);
let cpu_count = entries
.iter()
.filter(|(t, _, _)| *t == 0 || *t == 9)
.count();
assert_eq!(cpu_count, topo.total_cpus() as usize);
assert!(entries.iter().any(|(t, _, _)| *t == 4 || *t == 0x0A));
}
}
#[test]
fn cpu_entry_type_matches_apic_id() {
for (llcs, cores, threads) in [(1, 4, 1), (2, 2, 2), (15, 8, 2), (14, 9, 2)] {
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: cores,
threads_per_core: threads,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
let mut cpu_idx = 0u32;
for (entry_type, _, data) in &entries {
match *entry_type {
0 => {
let id = data[3] as u32;
assert!(id < 255);
assert_eq!(id, apic_id(&topo, cpu_idx));
cpu_idx += 1;
}
9 => {
let id = u32::from_le_bytes(data[4..8].try_into().unwrap());
assert!(id >= 255);
assert_eq!(id, apic_id(&topo, cpu_idx));
cpu_idx += 1;
}
_ => {}
}
}
}
}
#[test]
fn madt_entry_lengths_valid() {
let mem = test_mem(16);
let topo = Topology {
llcs: 14,
cores_per_llc: 9,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
for (entry_type, entry_len, _) in &entries {
let expected = match *entry_type {
0 => 8,
1 => 12,
2 => 10,
4 => 6,
9 => 16,
0x0A => 12,
t => panic!("unexpected MADT entry type {t}"),
};
assert_eq!(*entry_len, expected);
}
}
#[test]
fn madt_total_length_matches_entries() {
let mem = test_mem(16);
let topo = Topology {
llcs: 14,
cores_per_llc: 9,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let declared_len = u32::from_le_bytes(madt[4..8].try_into().unwrap()) as usize;
assert_eq!(declared_len, madt.len());
let entries = walk_madt_entries(&madt);
let entries_size: usize = entries.iter().map(|(_, l, _)| *l as usize).sum();
assert_eq!(
std::mem::size_of::<MadtHeader>() + entries_size,
declared_len
);
}
#[test]
fn cpu_flags_enabled() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 2,
threads_per_core: 2,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let madt = read_madt(&mem, &l);
let entries = walk_madt_entries(&madt);
for (entry_type, _, data) in &entries {
match *entry_type {
0 => assert_eq!(u32::from_le_bytes(data[4..8].try_into().unwrap()) & 1, 1),
9 => assert_eq!(u32::from_le_bytes(data[8..12].try_into().unwrap()) & 1, 1),
_ => {}
}
}
}
#[test]
fn rsdp_rev2_structure() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut rsdp = [0u8; 36];
mem.read_slice(&mut rsdp, GuestAddress(l.rsdp_addr))
.unwrap();
assert_eq!(&rsdp[..8], b"RSD PTR ");
assert_eq!(rsdp[15], 2);
assert_eq!(
u32::from_le_bytes(rsdp[16..20].try_into().unwrap()),
l.rsdt_addr as u32
);
assert_eq!(u32::from_le_bytes(rsdp[20..24].try_into().unwrap()), 36);
assert_eq!(
u64::from_le_bytes(rsdp[24..32].try_into().unwrap()),
l.xsdt_addr
);
let sum20: u8 = rsdp[..20].iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum20, 0);
let sum36: u8 = rsdp.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum36, 0);
}
#[test]
fn xsdt_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let xsdt = read_table(&mem, l.xsdt_addr);
assert_eq!(&xsdt[..4], b"XSDT");
assert_eq!(xsdt.len(), 68);
let sum: u8 = xsdt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0);
}
#[test]
fn xsdt_table_pointers() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut entry = [0u8; 8];
mem.read_slice(&mut entry, GuestAddress(l.xsdt_addr + 36))
.unwrap();
assert_eq!(u64::from_le_bytes(entry), l.fadt_addr);
mem.read_slice(&mut entry, GuestAddress(l.xsdt_addr + 44))
.unwrap();
assert_eq!(u64::from_le_bytes(entry), l.madt_addr);
mem.read_slice(&mut entry, GuestAddress(l.xsdt_addr + 52))
.unwrap();
assert_eq!(u64::from_le_bytes(entry), l.srat_addr);
mem.read_slice(&mut entry, GuestAddress(l.xsdt_addr + 60))
.unwrap();
assert_eq!(u64::from_le_bytes(entry), l.slit_addr);
}
#[test]
fn fadt_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut fadt = [0u8; 276];
mem.read_slice(&mut fadt, GuestAddress(l.fadt_addr))
.unwrap();
assert_eq!(&fadt[..4], b"FACP");
assert_eq!(u32::from_le_bytes(fadt[4..8].try_into().unwrap()), 276);
assert_eq!(fadt[8], 6);
let sum: u8 = fadt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0);
}
#[test]
fn fadt_hw_reduced_flags() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut fadt = [0u8; 276];
mem.read_slice(&mut fadt, GuestAddress(l.fadt_addr))
.unwrap();
let flags = u32::from_le_bytes(fadt[112..116].try_into().unwrap());
assert_eq!(flags & (1 << 20), 0, "HW_REDUCED_ACPI must not be set");
assert_ne!(flags & FADT_F_PWR_BUTTON, 0);
assert_ne!(flags & FADT_F_SLP_BUTTON, 0);
}
#[test]
fn fadt_minor_version() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut fadt = [0u8; 276];
mem.read_slice(&mut fadt, GuestAddress(l.fadt_addr))
.unwrap();
assert_eq!(fadt[131], 5);
}
#[test]
fn fadt_dsdt_pointers() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut fadt = [0u8; 276];
mem.read_slice(&mut fadt, GuestAddress(l.fadt_addr))
.unwrap();
assert_eq!(
u32::from_le_bytes(fadt[40..44].try_into().unwrap()),
l.dsdt_addr as u32
);
assert_eq!(
u64::from_le_bytes(fadt[140..148].try_into().unwrap()),
l.dsdt_addr
);
}
#[test]
fn dsdt_signature_and_checksum() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut dsdt = [0u8; 36];
mem.read_slice(&mut dsdt, GuestAddress(l.dsdt_addr))
.unwrap();
assert_eq!(&dsdt[..4], b"DSDT");
assert_eq!(u32::from_le_bytes(dsdt[4..8].try_into().unwrap()), 36);
let sum: u8 = dsdt.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0, "DSDT checksum must be zero");
}
#[test]
fn rsdp_points_to_xsdt() {
let mem = test_mem(16);
let topo = Topology {
llcs: 1,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let mut rsdp = [0u8; 36];
mem.read_slice(&mut rsdp, GuestAddress(l.rsdp_addr))
.unwrap();
assert_eq!(
u64::from_le_bytes(rsdp[24..32].try_into().unwrap()),
l.xsdt_addr
);
}
fn walk_srat_entries(srat: &[u8]) -> Vec<(u8, u8, &[u8])> {
let hdr_size = 48; let mut entries = Vec::new();
let mut offset = hdr_size;
while offset < srat.len() {
let entry_type = srat[offset];
let entry_len = srat[offset + 1] as usize;
entries.push((
entry_type,
entry_len as u8,
&srat[offset..offset + entry_len],
));
offset += entry_len;
}
entries
}
#[test]
fn srat_cpu_affinity_multi_numa() {
for (numa_nodes, llcs, cores, threads) in
[(2, 4, 2, 1), (2, 4, 2, 2), (4, 8, 1, 1), (3, 6, 2, 2)]
{
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: cores,
threads_per_core: threads,
numa_nodes,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let srat = read_table(&mem, l.srat_addr);
let entries = walk_srat_entries(&srat);
let mut cpu_idx = 0u32;
for (entry_type, _, data) in &entries {
if *entry_type == 2 {
let prox_domain = u32::from_le_bytes(data[4..8].try_into().unwrap());
let (llc_id, _, _) = topo.decompose(cpu_idx);
let expected_node = topo.numa_node_of(llc_id);
assert_eq!(
prox_domain, expected_node,
"cpu {cpu_idx}: proximity_domain {prox_domain} != expected {expected_node} \
(topo: {numa_nodes}n/{llcs}l/{cores}c/{threads}t)"
);
let x2apic = u32::from_le_bytes(data[8..12].try_into().unwrap());
assert_eq!(
x2apic,
apic_id(&topo, cpu_idx),
"cpu {cpu_idx}: x2apic_id mismatch"
);
cpu_idx += 1;
}
}
assert_eq!(cpu_idx, topo.total_cpus());
}
}
#[test]
fn srat_memory_split_multi_numa() {
for (numa_nodes, llcs) in [(2, 4), (3, 6), (4, 8)] {
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes,
nodes: None,
distances: None,
};
let mem_bytes = 256u64 << 20;
let l = test_setup(&mem, &topo, 256);
let srat = read_table(&mem, l.srat_addr);
let entries = walk_srat_entries(&srat);
let mem_entries: Vec<_> = entries.iter().filter(|(t, _, _)| *t == 1).collect();
assert_eq!(mem_entries.len(), numa_nodes as usize);
let mut prev_end: u64 = 0;
let mut total: u64 = 0;
for (i, (_, _, data)) in mem_entries.iter().enumerate() {
let prox_domain = u32::from_le_bytes(data[2..6].try_into().unwrap());
assert_eq!(
prox_domain, i as u32,
"node {i}: proximity_domain {prox_domain} != {i} \
(topo: {numa_nodes}n/{llcs}l)"
);
let base = u64::from_le_bytes(data[8..16].try_into().unwrap());
let length = u64::from_le_bytes(data[16..24].try_into().unwrap());
assert_eq!(
base, prev_end,
"node {i}: base {base:#x} != prev_end {prev_end:#x} \
(topo: {numa_nodes}n/{llcs}l)"
);
assert!(length > 0, "node {i}: zero-length memory region");
prev_end = base + length;
total += length;
}
assert_eq!(
total, mem_bytes,
"total memory mismatch for {numa_nodes}n/{llcs}l"
);
}
}
#[test]
fn slit_distance_matrix_multi_numa() {
for (numa_nodes, llcs) in [(2, 2), (3, 3), (4, 4), (2, 4), (2, 6), (3, 9)] {
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let slit = read_table(&mem, l.slit_addr);
assert_eq!(&slit[..4], b"SLIT", "SLIT signature mismatch");
let n = u64::from_le_bytes(slit[36..44].try_into().unwrap());
assert_eq!(n, numa_nodes as u64);
let matrix_start = 44;
for i in 0..n {
for j in 0..n {
let dist = slit[matrix_start + (i * n + j) as usize];
if i == j {
assert_eq!(dist, 10, "diagonal ({i},{j}) != 10");
} else {
assert_eq!(dist, 20, "off-diagonal ({i},{j}) != 20");
}
}
}
}
}
#[test]
fn srat_slit_checksum_multi_numa() {
for (numa_nodes, llcs, cores, threads) in
[(2, 2, 2, 1), (2, 4, 2, 2), (3, 3, 1, 1), (4, 8, 4, 2)]
{
let mem = test_mem(16);
let topo = Topology {
llcs,
cores_per_llc: cores,
threads_per_core: threads,
numa_nodes,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let srat = read_table(&mem, l.srat_addr);
let srat_sum: u8 = srat.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(
srat_sum, 0,
"SRAT checksum failed for {numa_nodes}n/{llcs}l/{cores}c/{threads}t"
);
let slit = read_table(&mem, l.slit_addr);
let slit_sum: u8 = slit.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(
slit_sum, 0,
"SLIT checksum failed for {numa_nodes}n/{llcs}l/{cores}c/{threads}t"
);
}
}
#[test]
fn srat_memory_split_remainder() {
let memory_mb = 257u32;
let mem = test_mem(memory_mb);
let topo = Topology {
llcs: 3,
cores_per_llc: 1,
threads_per_core: 1,
numa_nodes: 3,
nodes: None,
distances: None,
};
let mem_bytes = (memory_mb as u64) << 20;
let per_node_mb = memory_mb / 3;
let per_node = (per_node_mb as u64) << 20;
let last = (memory_mb - per_node_mb * 2) as u64;
let last_bytes = last << 20;
let l = test_setup(&mem, &topo, memory_mb);
let srat = read_table(&mem, l.srat_addr);
let entries = walk_srat_entries(&srat);
let mem_entries: Vec<_> = entries.iter().filter(|(t, _, _)| *t == 1).collect();
assert_eq!(mem_entries.len(), 3);
let mut total: u64 = 0;
for (i, (_, _, data)) in mem_entries.iter().enumerate() {
let length = u64::from_le_bytes(data[16..24].try_into().unwrap());
if i < 2 {
assert_eq!(
length, per_node,
"node {i}: expected {per_node}, got {length}"
);
} else {
assert_eq!(
length, last_bytes,
"last node: expected {last_bytes}, got {length}"
);
assert!(
length > per_node,
"last node should be larger due to remainder"
);
}
total += length;
}
assert_eq!(total, mem_bytes);
}
#[test]
fn srat_total_memory() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
let srat = read_table(&mem, l.srat_addr);
let entries = walk_srat_entries(&srat);
let total_mem: u64 = entries
.iter()
.filter(|(t, _, _)| *t == 1)
.map(|(_, _, data)| u64::from_le_bytes(data[16..24].try_into().unwrap()))
.sum();
let expected = 256u64 << 20;
assert_eq!(total_mem, expected);
}
use crate::vmm::topology::NumaNode;
static CXL_NODES: [NumaNode; 3] = [
NumaNode::new(2, 256),
NumaNode::new(2, 256),
NumaNode::new(0, 128),
];
#[test]
fn hmat_not_emitted_single_node() {
let mem = test_mem(16);
let topo = Topology {
llcs: 2,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes: 1,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
assert_eq!(l.hmat_size, 0, "HMAT must not be emitted for single-node");
}
#[test]
fn hmat_emitted_multi_numa_without_cxl() {
let mem = test_mem(16);
let topo = Topology {
llcs: 4,
cores_per_llc: 2,
threads_per_core: 1,
numa_nodes: 2,
nodes: None,
distances: None,
};
let l = test_setup(&mem, &topo, 256);
assert!(l.hmat_size > 0, "HMAT must be emitted for multi-NUMA");
}
#[test]
fn hmat_emitted_with_cxl() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
assert!(l.hmat_size > 0, "HMAT must be emitted with CXL nodes");
}
#[test]
fn hmat_checksum() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
let sum: u8 = hmat.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
assert_eq!(sum, 0, "HMAT checksum must be zero");
}
#[test]
fn hmat_header_fields() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
assert_eq!(&hmat[..4], b"HMAT");
assert_eq!(hmat[8], 2, "HMAT revision must be 2");
assert_eq!(
&hmat[36..40],
&[0, 0, 0, 0],
"4 reserved bytes after SDT header"
);
}
#[test]
fn hmat_mpda_count_and_flags() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
let num_targets = layout.regions().len();
let mut offset = 40;
let mut mpda_count = 0;
while offset < hmat.len() {
let hmat_type = u16::from_le_bytes(hmat[offset..offset + 2].try_into().unwrap());
if hmat_type != 0 {
break;
}
let length = u32::from_le_bytes(hmat[offset + 4..offset + 8].try_into().unwrap());
assert_eq!(length, 40, "MPDA length must be 40");
let flags = u16::from_le_bytes(hmat[offset + 8..offset + 10].try_into().unwrap());
assert_eq!(
flags, 3,
"MPDA flags must be 3 (PROCESSOR_PD_VALID | MEMORY_PD_VALID)"
);
mpda_count += 1;
offset += length as usize;
}
assert_eq!(
mpda_count, num_targets,
"one MPDA per memory target (layout region)"
);
}
#[test]
fn hmat_mpda_cxl_initiator() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
let mut offset = 40;
for region in layout.regions() {
let hmat_type = u16::from_le_bytes(hmat[offset..offset + 2].try_into().unwrap());
assert_eq!(hmat_type, 0);
let initiator = u32::from_le_bytes(hmat[offset + 12..offset + 16].try_into().unwrap());
let memory_pd = u32::from_le_bytes(hmat[offset + 16..offset + 20].try_into().unwrap());
assert_eq!(memory_pd, region.node_id);
if topo.llcs_in_node(region.node_id) > 0 {
assert_eq!(
initiator, region.node_id,
"CPU-bearing node {}: initiator must be self",
region.node_id
);
} else {
assert_ne!(
topo.llcs_in_node(initiator),
0,
"CXL node {}: initiator {} must be CPU-bearing",
region.node_id,
initiator
);
}
offset += 40;
}
}
#[test]
fn hmat_sllbi_latency_and_bandwidth() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
let num_targets = layout.regions().len();
let mut offset = 40 + 40 * num_targets;
for expected_data_type in [0u8, 3u8] {
let hmat_type = u16::from_le_bytes(hmat[offset..offset + 2].try_into().unwrap());
assert_eq!(hmat_type, 1, "SLLBI type must be 1");
let length = u32::from_le_bytes(hmat[offset + 4..offset + 8].try_into().unwrap());
let data_type = hmat[offset + 9];
assert_eq!(data_type, expected_data_type);
let ni = u32::from_le_bytes(hmat[offset + 12..offset + 16].try_into().unwrap());
let nt = u32::from_le_bytes(hmat[offset + 16..offset + 20].try_into().unwrap());
assert_eq!(ni, topo.cpu_bearing_nodes());
assert_eq!(nt, num_targets as u32);
let base_unit = u64::from_le_bytes(hmat[offset + 24..offset + 32].try_into().unwrap());
if data_type == 0 {
assert_eq!(base_unit, 100_000, "latency base must be 100000 ps");
} else {
assert_eq!(base_unit, 10_240, "bandwidth base must be 10240 MB/s");
}
offset += length as usize;
}
}
#[test]
fn hmat_sllbi_cxl_entries_differ() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let hmat = read_table(&mem, l.hmat_addr);
let num_targets = layout.regions().len();
let ni = topo.cpu_bearing_nodes() as usize;
let nt = num_targets;
let sllbi_offset = 40 + 40 * nt;
let matrix_offset = sllbi_offset + 32 + 4 * ni + 4 * nt;
let dram_entry =
u16::from_le_bytes(hmat[matrix_offset..matrix_offset + 2].try_into().unwrap());
let cxl_entry = u16::from_le_bytes(
hmat[matrix_offset + 2 * (nt - 1)..matrix_offset + 2 * nt]
.try_into()
.unwrap(),
);
assert_eq!(dram_entry, 1, "local DRAM latency entry must be 1");
assert_eq!(
cxl_entry, 6,
"remote CXL latency entry must be 6 (distance-scaled)"
);
}
#[test]
fn hmat_rsdt_xsdt_include_pointer() {
let topo = Topology::with_nodes(4, 1, &CXL_NODES);
let mem = test_mem(16);
let layout = NumaMemoryLayout::compute(&topo, 640, 0).unwrap();
let l = setup_acpi(&mem, &topo, &layout).unwrap();
let rsdt = read_table(&mem, l.rsdt_addr);
let rsdt_entries = (rsdt.len() - 36) / 4;
assert_eq!(rsdt_entries, 5, "RSDT must have 5 table pointers with HMAT");
let hmat_ptr = u32::from_le_bytes(rsdt[36 + 16..36 + 20].try_into().unwrap());
assert_eq!(hmat_ptr, l.hmat_addr as u32);
let xsdt = read_table(&mem, l.xsdt_addr);
let xsdt_entries = (xsdt.len() - 36) / 8;
assert_eq!(xsdt_entries, 5, "XSDT must have 5 table pointers with HMAT");
let hmat_ptr64 = u64::from_le_bytes(xsdt[36 + 32..36 + 40].try_into().unwrap());
assert_eq!(hmat_ptr64, l.hmat_addr);
}
#[test]
fn no_hmat_rsdt_has_4_entries() {
let mem = test_mem(16);
let topo = Topology::new(1, 2, 2, 1);
let l = test_setup(&mem, &topo, 256);
let rsdt = read_table(&mem, l.rsdt_addr);
let rsdt_entries = (rsdt.len() - 36) / 4;
assert_eq!(
rsdt_entries, 4,
"RSDT must have 4 table pointers without HMAT"
);
}
}