use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;
use core::ffi::c_void;
use core::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use spin::Mutex;
use crate::hw::cxl::{BlockTemperature, CxlBlockMeta, CxlTier, CxlTierStats};
#[cfg(target_arch = "x86_64")]
const SYS_MBIND: i64 = 237;
#[cfg(target_arch = "x86_64")]
const SYS_MOVE_PAGES: i64 = 279;
#[cfg(target_arch = "x86_64")]
const SYS_GET_MEMPOLICY: i64 = 239;
#[cfg(target_arch = "x86_64")]
const SYS_SET_MEMPOLICY: i64 = 238;
#[cfg(target_arch = "aarch64")]
const SYS_MBIND: i64 = 235;
#[cfg(target_arch = "aarch64")]
const SYS_MOVE_PAGES: i64 = 239;
#[cfg(target_arch = "aarch64")]
const SYS_GET_MEMPOLICY: i64 = 236;
#[cfg(target_arch = "aarch64")]
const SYS_SET_MEMPOLICY: i64 = 237;
pub const MPOL_DEFAULT: i32 = 0;
pub const MPOL_BIND: i32 = 2;
pub const MPOL_INTERLEAVE: i32 = 3;
pub const MPOL_PREFERRED: i32 = 1;
pub const MPOL_LOCAL: i32 = 4;
pub const MPOL_MF_MOVE: u32 = 1 << 1;
pub const MPOL_MF_MOVE_ALL: u32 = 1 << 2;
pub const MPOL_MF_STRICT: u32 = 1 << 0;
pub const PROT_READ: i32 = 0x1;
pub const PROT_WRITE: i32 = 0x2;
pub const MAP_PRIVATE: i32 = 0x02;
pub const MAP_ANONYMOUS: i32 = 0x20;
pub const MAP_POPULATE: i32 = 0x8000;
pub const MAP_FAILED: *mut c_void = !0usize as *mut c_void;
#[cfg(all(target_os = "linux", feature = "std"))]
mod ffi {
use super::*;
unsafe extern "C" {
pub fn mmap(
addr: *mut c_void,
length: usize,
prot: i32,
flags: i32,
fd: i32,
offset: i64,
) -> *mut c_void;
pub fn munmap(addr: *mut c_void, length: usize) -> i32;
pub fn syscall(number: i64, ...) -> i64;
}
}
pub const MAX_NUMA_NODES: usize = 64;
#[derive(Debug, Clone, Copy, Default)]
#[repr(C)]
pub struct NumaNodeMask {
mask: u64,
}
impl NumaNodeMask {
pub const fn new() -> Self {
Self { mask: 0 }
}
pub const fn single(node: i32) -> Self {
if node < 0 || node >= 64 {
return Self { mask: 0 };
}
Self { mask: 1u64 << node }
}
pub fn set(&mut self, node: i32) {
if (0..64).contains(&node) {
self.mask |= 1u64 << node;
}
}
pub fn clear(&mut self, node: i32) {
if (0..64).contains(&node) {
self.mask &= !(1u64 << node);
}
}
pub fn is_set(&self, node: i32) -> bool {
if !(0..64).contains(&node) {
return false;
}
(self.mask & (1u64 << node)) != 0
}
pub fn as_raw(&self) -> u64 {
self.mask
}
pub fn as_ptr(&self) -> *const u64 {
&self.mask
}
pub fn count(&self) -> u32 {
self.mask.count_ones()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CxlError {
NoDevices,
InvalidNode,
AllocationFailed,
MigrationFailed,
PermissionDenied,
TierFull,
BlockNotFound,
AlreadyAtTier,
SyscallFailed(i32),
SysfsReadFailed,
InvalidDevice,
}
impl CxlError {
pub fn description(&self) -> &'static str {
match self {
CxlError::NoDevices => "No CXL devices found",
CxlError::InvalidNode => "Invalid NUMA node",
CxlError::AllocationFailed => "Memory allocation failed",
CxlError::MigrationFailed => "Page migration failed",
CxlError::PermissionDenied => "Permission denied",
CxlError::TierFull => "Tier is full",
CxlError::BlockNotFound => "Block not found",
CxlError::AlreadyAtTier => "Already at target tier",
CxlError::SyscallFailed(_) => "Syscall failed",
CxlError::SysfsReadFailed => "Sysfs read failed",
CxlError::InvalidDevice => "Invalid device",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CxlDeviceType {
Type1,
Type2,
Type3,
}
#[derive(Debug, Clone)]
pub struct CxlDevice {
pub device_id: u32,
pub name: String,
pub device_type: CxlDeviceType,
pub numa_node: i32,
pub memory_size: u64,
pub bandwidth_gbps: u32,
pub latency_ns: u32,
pub tier: CxlTier,
}
impl CxlDevice {
pub fn new(device_id: u32, name: String, numa_node: i32) -> Self {
Self {
device_id,
name,
device_type: CxlDeviceType::Type3,
numa_node,
memory_size: 0,
bandwidth_gbps: 64,
latency_ns: 300,
tier: CxlTier::CxlNear,
}
}
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn allocate_on_node(size: usize, numa_node: i32) -> Result<*mut u8, CxlError> {
if size == 0 {
return Err(CxlError::AllocationFailed);
}
if numa_node < 0 || numa_node >= MAX_NUMA_NODES as i32 {
return Err(CxlError::InvalidNode);
}
let ptr = unsafe {
ffi::mmap(
core::ptr::null_mut(),
size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
-1,
0,
)
};
if ptr == MAP_FAILED {
return Err(CxlError::AllocationFailed);
}
let nodemask = NumaNodeMask::single(numa_node);
let maxnode = MAX_NUMA_NODES;
let result = unsafe {
ffi::syscall(
SYS_MBIND,
ptr,
size,
MPOL_BIND,
nodemask.as_ptr(),
maxnode,
MPOL_MF_MOVE | MPOL_MF_STRICT,
)
};
if result < 0 {
unsafe { ffi::munmap(ptr, size) };
return Err(CxlError::SyscallFailed(result as i32));
}
Ok(ptr as *mut u8)
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub unsafe fn free_numa_memory(ptr: *mut u8, size: usize) {
if !ptr.is_null() && size > 0 {
unsafe { ffi::munmap(ptr as *mut c_void, size) };
}
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn allocate_on_node(size: usize, numa_node: i32) -> Result<*mut u8, CxlError> {
if size == 0 {
return Err(CxlError::AllocationFailed);
}
let fake_addr = ((numa_node as u64) << 56) | (size as u64);
Ok(fake_addr as *mut u8)
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub unsafe fn free_numa_memory(_ptr: *mut u8, _size: usize) {
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MigrationStatus {
Success,
AlreadyOnNode,
SharedPage,
Busy,
Failed(i32),
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn migrate_pages(
pages: &[*mut c_void],
target_node: i32,
) -> Result<Vec<MigrationStatus>, CxlError> {
if pages.is_empty() {
return Ok(Vec::new());
}
if target_node < 0 || target_node >= MAX_NUMA_NODES as i32 {
return Err(CxlError::InvalidNode);
}
let count = pages.len();
let nodes: Vec<i32> = vec![target_node; count];
let mut status: Vec<i32> = vec![0; count];
let result = unsafe {
ffi::syscall(
SYS_MOVE_PAGES,
0i32, count, pages.as_ptr(), nodes.as_ptr(), status.as_mut_ptr(), MPOL_MF_MOVE, )
};
if result < 0 {
let errno = -result as i32;
if errno == 1 {
return Err(CxlError::PermissionDenied);
}
return Err(CxlError::SyscallFailed(errno));
}
let statuses = status
.iter()
.map(|&s| {
if s >= 0 {
if s == target_node {
MigrationStatus::Success
} else {
MigrationStatus::AlreadyOnNode
}
} else {
match -s {
11 => MigrationStatus::Busy, 14 => MigrationStatus::SharedPage, _ => MigrationStatus::Failed(-s),
}
}
})
.collect();
Ok(statuses)
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn migrate_pages(
pages: &[*mut c_void],
_target_node: i32,
) -> Result<Vec<MigrationStatus>, CxlError> {
Ok(vec![MigrationStatus::Success; pages.len()])
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn get_memory_node(addr: *const c_void) -> Result<i32, CxlError> {
let mut node: i32 = -1;
let mut nodemask: u64 = 0;
let result = unsafe {
ffi::syscall(
SYS_GET_MEMPOLICY,
&mut node as *mut i32,
&mut nodemask as *mut u64,
MAX_NUMA_NODES,
addr,
2u32, )
};
if result < 0 {
return Err(CxlError::SyscallFailed(-result as i32));
}
Ok(node)
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn get_memory_node(_addr: *const c_void) -> Result<i32, CxlError> {
Ok(0)
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn discover_cxl_topology() -> Vec<CxlDevice> {
use std::fs;
use std::path::Path;
let mut devices = Vec::new();
let cxl_path = Path::new("/sys/bus/cxl/devices");
if !cxl_path.exists() {
return devices;
}
if let Ok(entries) = fs::read_dir(cxl_path) {
for (idx, entry) in entries.flatten().enumerate() {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("mem") {
continue;
}
let device_path = entry.path();
let numa_node = read_sysfs_int(&device_path.join("numa_node")).unwrap_or(-1);
let size = read_sysfs_u64(&device_path.join("size")).unwrap_or(0);
let device_type = if device_path.join("type3").exists() {
CxlDeviceType::Type3
} else if device_path.join("type2").exists() {
CxlDeviceType::Type2
} else {
CxlDeviceType::Type1
};
let mut device = CxlDevice::new(idx as u32, name, numa_node);
device.device_type = device_type;
device.memory_size = size;
device.tier = determine_tier_for_device(&device);
devices.push(device);
}
}
devices
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn discover_cxl_topology() -> Vec<CxlDevice> {
vec![
CxlDevice {
device_id: 0,
name: String::from("mem0"),
device_type: CxlDeviceType::Type3,
numa_node: 2,
memory_size: 128 * 1024 * 1024 * 1024, bandwidth_gbps: 64,
latency_ns: 300,
tier: CxlTier::CxlNear,
},
CxlDevice {
device_id: 1,
name: String::from("mem1"),
device_type: CxlDeviceType::Type3,
numa_node: 3,
memory_size: 256 * 1024 * 1024 * 1024, bandwidth_gbps: 32,
latency_ns: 1000,
tier: CxlTier::CxlFar,
},
]
}
#[cfg(all(target_os = "linux", feature = "std"))]
fn read_sysfs_int(path: &std::path::Path) -> Option<i32> {
std::fs::read_to_string(path).ok()?.trim().parse().ok()
}
#[cfg(all(target_os = "linux", feature = "std"))]
fn read_sysfs_u64(path: &std::path::Path) -> Option<u64> {
std::fs::read_to_string(path).ok()?.trim().parse().ok()
}
fn determine_tier_for_device(device: &CxlDevice) -> CxlTier {
match device.latency_ns {
0..=200 => CxlTier::LocalDram, 201..=500 => CxlTier::CxlNear, 501..=2000 => CxlTier::CxlFar, _ => CxlTier::Storage, }
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn get_numa_node_for_cxl_device(device_id: u32) -> Result<i32, CxlError> {
use std::fs;
use std::path::Path;
let cxl_path = Path::new("/sys/bus/cxl/devices");
if let Ok(entries) = fs::read_dir(cxl_path) {
for (idx, entry) in entries.flatten().enumerate() {
if idx as u32 != device_id {
continue;
}
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("mem") {
continue;
}
let numa_path = entry.path().join("numa_node");
if let Some(node) = read_sysfs_int(&numa_path) {
return Ok(node);
}
}
}
Err(CxlError::InvalidDevice)
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn get_numa_node_for_cxl_device(device_id: u32) -> Result<i32, CxlError> {
match device_id {
0 => Ok(2), 1 => Ok(3), _ => Err(CxlError::InvalidDevice),
}
}
#[derive(Debug, Clone)]
pub struct NumaNodeInfo {
pub node_id: i32,
pub total_memory: u64,
pub free_memory: u64,
pub cpus: Vec<u32>,
pub is_cxl: bool,
pub tier: Option<CxlTier>,
}
#[cfg(all(target_os = "linux", feature = "std"))]
pub fn discover_numa_topology() -> Vec<NumaNodeInfo> {
use std::fs;
use std::path::Path;
let mut nodes = Vec::new();
let numa_path = Path::new("/sys/devices/system/node");
if !numa_path.exists() {
return nodes;
}
let cxl_devices = discover_cxl_topology();
if let Ok(entries) = fs::read_dir(numa_path) {
for entry in entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("node") {
continue;
}
let node_id: i32 = name
.strip_prefix("node")
.and_then(|s| s.parse().ok())
.unwrap_or(-1);
if node_id < 0 {
continue;
}
let node_path = entry.path();
let meminfo_path = node_path.join("meminfo");
let (total, free) = parse_meminfo(&meminfo_path).unwrap_or((0, 0));
let cpulist_path = node_path.join("cpulist");
let cpus = parse_cpulist(&cpulist_path).unwrap_or_default();
let cxl_device = cxl_devices.iter().find(|d| d.numa_node == node_id);
let is_cxl = cxl_device.is_some();
let tier = cxl_device.map(|d| d.tier);
nodes.push(NumaNodeInfo {
node_id,
total_memory: total,
free_memory: free,
cpus,
is_cxl,
tier,
});
}
}
nodes
}
#[cfg(not(all(target_os = "linux", feature = "std")))]
pub fn discover_numa_topology() -> Vec<NumaNodeInfo> {
vec![
NumaNodeInfo {
node_id: 0,
total_memory: 32 * 1024 * 1024 * 1024,
free_memory: 16 * 1024 * 1024 * 1024,
cpus: vec![0, 1, 2, 3, 4, 5, 6, 7],
is_cxl: false,
tier: Some(CxlTier::LocalDram),
},
NumaNodeInfo {
node_id: 1,
total_memory: 32 * 1024 * 1024 * 1024,
free_memory: 16 * 1024 * 1024 * 1024,
cpus: vec![8, 9, 10, 11, 12, 13, 14, 15],
is_cxl: false,
tier: Some(CxlTier::LocalDram),
},
NumaNodeInfo {
node_id: 2,
total_memory: 128 * 1024 * 1024 * 1024,
free_memory: 120 * 1024 * 1024 * 1024,
cpus: vec![],
is_cxl: true,
tier: Some(CxlTier::CxlNear),
},
NumaNodeInfo {
node_id: 3,
total_memory: 256 * 1024 * 1024 * 1024,
free_memory: 250 * 1024 * 1024 * 1024,
cpus: vec![],
is_cxl: true,
tier: Some(CxlTier::CxlFar),
},
]
}
#[cfg(all(target_os = "linux", feature = "std"))]
fn parse_meminfo(path: &std::path::Path) -> Option<(u64, u64)> {
let content = std::fs::read_to_string(path).ok()?;
let mut total = 0u64;
let mut free = 0u64;
for line in content.lines() {
if line.contains("MemTotal:") {
total = parse_meminfo_value(line)?;
} else if line.contains("MemFree:") {
free = parse_meminfo_value(line)?;
}
}
Some((total * 1024, free * 1024)) }
#[cfg(all(target_os = "linux", feature = "std"))]
fn parse_meminfo_value(line: &str) -> Option<u64> {
line.split_whitespace()
.nth(3)? .parse()
.ok()
}
#[cfg(all(target_os = "linux", feature = "std"))]
fn parse_cpulist(path: &std::path::Path) -> Option<Vec<u32>> {
let content = std::fs::read_to_string(path).ok()?;
let mut cpus = Vec::new();
for part in content.trim().split(',') {
if part.contains('-') {
let mut range_parts = part.split('-');
let start: u32 = range_parts.next()?.parse().ok()?;
let end: u32 = range_parts.next()?.parse().ok()?;
for cpu in start..=end {
cpus.push(cpu);
}
} else if !part.is_empty() {
cpus.push(part.parse().ok()?);
}
}
Some(cpus)
}
#[derive(Debug, Clone)]
pub struct TierMapping {
pub local_dram_node: i32,
pub cxl_near_node: i32,
pub cxl_far_node: i32,
pub hardware_available: bool,
}
impl Default for TierMapping {
fn default() -> Self {
Self {
local_dram_node: 0,
cxl_near_node: -1, cxl_far_node: -1, hardware_available: false,
}
}
}
impl TierMapping {
pub fn from_topology(numa_nodes: &[NumaNodeInfo]) -> Self {
let mut mapping = Self::default();
if let Some(local) = numa_nodes.iter().find(|n| !n.cpus.is_empty() && !n.is_cxl) {
mapping.local_dram_node = local.node_id;
}
for node in numa_nodes.iter().filter(|n| n.is_cxl) {
match node.tier {
Some(CxlTier::CxlNear) => {
if mapping.cxl_near_node < 0 {
mapping.cxl_near_node = node.node_id;
mapping.hardware_available = true;
}
}
Some(CxlTier::CxlFar) => {
if mapping.cxl_far_node < 0 {
mapping.cxl_far_node = node.node_id;
mapping.hardware_available = true;
}
}
_ => {}
}
}
mapping
}
pub fn get_node_for_tier(&self, tier: CxlTier) -> Option<i32> {
match tier {
CxlTier::LocalDram => Some(self.local_dram_node),
CxlTier::CxlNear => {
if self.cxl_near_node >= 0 {
Some(self.cxl_near_node)
} else {
None
}
}
CxlTier::CxlFar => {
if self.cxl_far_node >= 0 {
Some(self.cxl_far_node)
} else {
None
}
}
CxlTier::Storage => None, }
}
}
#[derive(Debug, Clone)]
struct BlockAllocation {
block_id: u64,
addr: u64,
size: usize,
tier: CxlTier,
numa_node: i32,
}
pub struct LinuxCxlManager {
blocks: BTreeMap<u64, CxlBlockMeta>,
allocations: BTreeMap<u64, BlockAllocation>,
tier_stats: BTreeMap<CxlTier, CxlTierStats>,
tier_mapping: TierMapping,
devices: Vec<CxlDevice>,
numa_nodes: Vec<NumaNodeInfo>,
timestamp: AtomicU64,
total_promotions: AtomicU64,
total_demotions: AtomicU64,
hardware_available: AtomicBool,
}
impl Default for LinuxCxlManager {
fn default() -> Self {
Self::new()
}
}
impl LinuxCxlManager {
pub fn new() -> Self {
let mut tier_stats = BTreeMap::new();
tier_stats.insert(CxlTier::LocalDram, CxlTierStats::default());
tier_stats.insert(CxlTier::CxlNear, CxlTierStats::default());
tier_stats.insert(CxlTier::CxlFar, CxlTierStats::default());
tier_stats.insert(CxlTier::Storage, CxlTierStats::default());
Self {
blocks: BTreeMap::new(),
allocations: BTreeMap::new(),
tier_stats,
tier_mapping: TierMapping::default(),
devices: Vec::new(),
numa_nodes: Vec::new(),
timestamp: AtomicU64::new(0),
total_promotions: AtomicU64::new(0),
total_demotions: AtomicU64::new(0),
hardware_available: AtomicBool::new(false),
}
}
pub fn init(&mut self) -> Result<(), CxlError> {
self.numa_nodes = discover_numa_topology();
self.devices = discover_cxl_topology();
self.tier_mapping = TierMapping::from_topology(&self.numa_nodes);
for node in &self.numa_nodes {
if let Some(tier) = node.tier {
if let Some(stats) = self.tier_stats.get_mut(&tier) {
stats.capacity += node.total_memory;
}
}
}
self.hardware_available
.store(self.tier_mapping.hardware_available, Ordering::SeqCst);
Ok(())
}
pub fn is_hardware_available(&self) -> bool {
self.hardware_available.load(Ordering::SeqCst)
}
pub fn devices(&self) -> &[CxlDevice] {
&self.devices
}
pub fn numa_nodes(&self) -> &[NumaNodeInfo] {
&self.numa_nodes
}
pub fn allocate_block(
&mut self,
block_id: u64,
size: usize,
tier: CxlTier,
) -> Result<u64, CxlError> {
let stats = self.tier_stats.get(&tier).ok_or(CxlError::InvalidNode)?;
if stats.used + size as u64 > stats.capacity {
return Err(CxlError::TierFull);
}
let numa_node = self.tier_mapping.get_node_for_tier(tier);
let addr = if let Some(node) = numa_node {
let ptr = allocate_on_node(size, node)?;
ptr as u64
} else {
block_id | (tier as u64) << 56
};
let allocation = BlockAllocation {
block_id,
addr,
size,
tier,
numa_node: numa_node.unwrap_or(-1),
};
self.allocations.insert(block_id, allocation);
let ts = self.timestamp.fetch_add(1, Ordering::SeqCst);
let block = CxlBlockMeta::new(block_id, tier, ts);
self.blocks.insert(block_id, block);
if let Some(stats) = self.tier_stats.get_mut(&tier) {
stats.used += size as u64;
stats.block_count += 1;
}
Ok(addr)
}
pub fn free_block(&mut self, block_id: u64) -> Result<(), CxlError> {
let allocation = self
.allocations
.remove(&block_id)
.ok_or(CxlError::BlockNotFound)?;
if allocation.numa_node >= 0 {
unsafe {
free_numa_memory(allocation.addr as *mut u8, allocation.size);
}
}
self.blocks.remove(&block_id);
if let Some(stats) = self.tier_stats.get_mut(&allocation.tier) {
stats.used = stats.used.saturating_sub(allocation.size as u64);
stats.block_count = stats.block_count.saturating_sub(1);
}
Ok(())
}
pub fn access_block(&mut self, block_id: u64) {
let ts = self.timestamp.fetch_add(1, Ordering::SeqCst);
if let Some(block) = self.blocks.get_mut(&block_id) {
block.record_access(ts);
}
}
pub fn promote_block(&mut self, block_id: u64) -> Result<CxlTier, CxlError> {
let allocation = self
.allocations
.get(&block_id)
.ok_or(CxlError::BlockNotFound)?;
let current_tier = allocation.tier;
let new_tier = match current_tier {
CxlTier::Storage => CxlTier::CxlFar,
CxlTier::CxlFar => CxlTier::CxlNear,
CxlTier::CxlNear => CxlTier::LocalDram,
CxlTier::LocalDram => return Err(CxlError::AlreadyAtTier),
};
let stats = self
.tier_stats
.get(&new_tier)
.ok_or(CxlError::InvalidNode)?;
if stats.used + allocation.size as u64 > stats.capacity {
return Err(CxlError::TierFull);
}
if let Some(target_node) = self.tier_mapping.get_node_for_tier(new_tier) {
if allocation.numa_node >= 0 && allocation.numa_node != target_node {
let page_size = 4096usize;
let num_pages = allocation.size.div_ceil(page_size);
let mut pages: Vec<*mut c_void> = Vec::with_capacity(num_pages);
for i in 0..num_pages {
pages.push((allocation.addr + (i * page_size) as u64) as *mut c_void);
}
migrate_pages(&pages, target_node)?;
}
}
let size = allocation.size;
if let Some(alloc) = self.allocations.get_mut(&block_id) {
alloc.tier = new_tier;
if let Some(node) = self.tier_mapping.get_node_for_tier(new_tier) {
alloc.numa_node = node;
}
}
if let Some(block) = self.blocks.get_mut(&block_id) {
block.current_tier = new_tier;
}
if let Some(old_stats) = self.tier_stats.get_mut(¤t_tier) {
old_stats.used = old_stats.used.saturating_sub(size as u64);
old_stats.block_count = old_stats.block_count.saturating_sub(1);
old_stats.promotions += 1;
}
if let Some(new_stats) = self.tier_stats.get_mut(&new_tier) {
new_stats.used += size as u64;
new_stats.block_count += 1;
}
self.total_promotions.fetch_add(1, Ordering::SeqCst);
Ok(new_tier)
}
pub fn demote_block(&mut self, block_id: u64) -> Result<CxlTier, CxlError> {
let allocation = self
.allocations
.get(&block_id)
.ok_or(CxlError::BlockNotFound)?;
let current_tier = allocation.tier;
let new_tier = match current_tier {
CxlTier::LocalDram => CxlTier::CxlNear,
CxlTier::CxlNear => CxlTier::CxlFar,
CxlTier::CxlFar => CxlTier::Storage,
CxlTier::Storage => return Err(CxlError::AlreadyAtTier),
};
if let Some(target_node) = self.tier_mapping.get_node_for_tier(new_tier) {
if allocation.numa_node >= 0 && allocation.numa_node != target_node {
let page_size = 4096usize;
let num_pages = allocation.size.div_ceil(page_size);
let mut pages: Vec<*mut c_void> = Vec::with_capacity(num_pages);
for i in 0..num_pages {
pages.push((allocation.addr + (i * page_size) as u64) as *mut c_void);
}
migrate_pages(&pages, target_node)?;
}
}
let size = allocation.size;
if let Some(alloc) = self.allocations.get_mut(&block_id) {
alloc.tier = new_tier;
if let Some(node) = self.tier_mapping.get_node_for_tier(new_tier) {
alloc.numa_node = node;
}
}
if let Some(block) = self.blocks.get_mut(&block_id) {
block.current_tier = new_tier;
}
if let Some(old_stats) = self.tier_stats.get_mut(¤t_tier) {
old_stats.used = old_stats.used.saturating_sub(size as u64);
old_stats.block_count = old_stats.block_count.saturating_sub(1);
old_stats.demotions += 1;
}
if let Some(new_stats) = self.tier_stats.get_mut(&new_tier) {
new_stats.used += size as u64;
new_stats.block_count += 1;
}
self.total_demotions.fetch_add(1, Ordering::SeqCst);
Ok(new_tier)
}
pub fn auto_tier(&mut self) -> (u64, u64) {
let mut promotions = 0u64;
let mut demotions = 0u64;
let ts = self.timestamp.load(Ordering::SeqCst);
let block_ids: Vec<u64> = self.blocks.keys().copied().collect();
for block_id in block_ids {
let Some(block) = self.blocks.get(&block_id) else {
continue;
};
let should_promote = block.should_promote();
let should_demote = block.should_demote(ts);
if should_promote {
if self.promote_block(block_id).is_ok() {
promotions += 1;
}
} else if should_demote && self.demote_block(block_id).is_ok() {
demotions += 1;
}
}
(promotions, demotions)
}
pub fn get_tier_stats(&self, tier: CxlTier) -> Option<CxlTierStats> {
self.tier_stats.get(&tier).cloned()
}
pub fn get_global_stats(&self) -> (usize, u64, u64) {
(
self.blocks.len(),
self.total_promotions.load(Ordering::SeqCst),
self.total_demotions.load(Ordering::SeqCst),
)
}
pub fn tier_mapping(&self) -> &TierMapping {
&self.tier_mapping
}
}
lazy_static::lazy_static! {
static ref LINUX_CXL_MANAGER: Mutex<LinuxCxlManager> = Mutex::new(LinuxCxlManager::new());
}
pub struct LinuxCxlEngine;
impl LinuxCxlEngine {
pub fn init() -> Result<(), CxlError> {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.init()
}
pub fn is_hardware_available() -> bool {
let mgr = LINUX_CXL_MANAGER.lock();
mgr.is_hardware_available()
}
pub fn allocate(block_id: u64, size: usize, tier: CxlTier) -> Result<u64, CxlError> {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.allocate_block(block_id, size, tier)
}
pub fn free(block_id: u64) -> Result<(), CxlError> {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.free_block(block_id)
}
pub fn access(block_id: u64) {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.access_block(block_id);
}
pub fn promote(block_id: u64) -> Result<CxlTier, CxlError> {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.promote_block(block_id)
}
pub fn demote(block_id: u64) -> Result<CxlTier, CxlError> {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.demote_block(block_id)
}
pub fn auto_tier() -> (u64, u64) {
let mut mgr = LINUX_CXL_MANAGER.lock();
mgr.auto_tier()
}
pub fn get_tier_stats(tier: CxlTier) -> Option<CxlTierStats> {
let mgr = LINUX_CXL_MANAGER.lock();
mgr.get_tier_stats(tier)
}
pub fn get_global_stats() -> (usize, u64, u64) {
let mgr = LINUX_CXL_MANAGER.lock();
mgr.get_global_stats()
}
pub fn get_devices() -> Vec<CxlDevice> {
let mgr = LINUX_CXL_MANAGER.lock();
mgr.devices().to_vec()
}
pub fn get_numa_nodes() -> Vec<NumaNodeInfo> {
let mgr = LINUX_CXL_MANAGER.lock();
mgr.numa_nodes().to_vec()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_numa_node_mask() {
let mut mask = NumaNodeMask::new();
assert_eq!(mask.count(), 0);
mask.set(0);
mask.set(2);
assert!(mask.is_set(0));
assert!(!mask.is_set(1));
assert!(mask.is_set(2));
assert_eq!(mask.count(), 2);
mask.clear(0);
assert!(!mask.is_set(0));
assert_eq!(mask.count(), 1);
}
#[test]
fn test_numa_node_mask_single() {
let mask = NumaNodeMask::single(3);
assert!(mask.is_set(3));
assert!(!mask.is_set(0));
assert!(!mask.is_set(2));
assert_eq!(mask.count(), 1);
}
#[test]
fn test_numa_node_mask_invalid() {
let mut mask = NumaNodeMask::new();
mask.set(-1);
mask.set(100);
assert_eq!(mask.count(), 0);
}
#[test]
fn test_cxl_error_descriptions() {
assert_eq!(CxlError::NoDevices.description(), "No CXL devices found");
assert_eq!(CxlError::InvalidNode.description(), "Invalid NUMA node");
assert_eq!(CxlError::TierFull.description(), "Tier is full");
}
#[test]
fn test_cxl_device_new() {
let device = CxlDevice::new(0, String::from("mem0"), 2);
assert_eq!(device.device_id, 0);
assert_eq!(device.name, "mem0");
assert_eq!(device.numa_node, 2);
assert_eq!(device.tier, CxlTier::CxlNear);
}
#[test]
fn test_tier_mapping_default() {
let mapping = TierMapping::default();
assert_eq!(mapping.local_dram_node, 0);
assert_eq!(mapping.cxl_near_node, -1);
assert_eq!(mapping.cxl_far_node, -1);
assert!(!mapping.hardware_available);
}
#[test]
fn test_tier_mapping_get_node() {
let mapping = TierMapping {
local_dram_node: 0,
cxl_near_node: 2,
cxl_far_node: 3,
hardware_available: true,
};
assert_eq!(mapping.get_node_for_tier(CxlTier::LocalDram), Some(0));
assert_eq!(mapping.get_node_for_tier(CxlTier::CxlNear), Some(2));
assert_eq!(mapping.get_node_for_tier(CxlTier::CxlFar), Some(3));
assert_eq!(mapping.get_node_for_tier(CxlTier::Storage), None);
}
#[test]
fn test_linux_cxl_manager_new() {
let mgr = LinuxCxlManager::new();
assert!(!mgr.is_hardware_available());
assert!(mgr.devices().is_empty());
}
#[test]
fn test_simulation_topology_discovery() {
let devices = discover_cxl_topology();
#[cfg(not(all(target_os = "linux", feature = "std")))]
{
assert!(!devices.is_empty());
assert_eq!(devices[0].name, "mem0");
}
}
#[test]
fn test_simulation_numa_discovery() {
let nodes = discover_numa_topology();
#[cfg(not(all(target_os = "linux", feature = "std")))]
{
assert!(!nodes.is_empty());
assert!(nodes.iter().any(|n| n.is_cxl));
}
}
#[test]
fn test_manager_allocate_and_free() {
let mut mgr = LinuxCxlManager::new();
if let Some(stats) = mgr.tier_stats.get_mut(&CxlTier::LocalDram) {
stats.capacity = 1024 * 1024 * 1024; }
let result = mgr.allocate_block(1, 4096, CxlTier::LocalDram);
assert!(result.is_ok());
let stats = mgr.get_tier_stats(CxlTier::LocalDram).unwrap();
assert_eq!(stats.block_count, 1);
assert_eq!(stats.used, 4096);
let result = mgr.free_block(1);
assert!(result.is_ok());
let stats = mgr.get_tier_stats(CxlTier::LocalDram).unwrap();
assert_eq!(stats.block_count, 0);
assert_eq!(stats.used, 0);
}
#[test]
fn test_manager_access_tracking() {
let mut mgr = LinuxCxlManager::new();
if let Some(stats) = mgr.tier_stats.get_mut(&CxlTier::LocalDram) {
stats.capacity = 1024 * 1024 * 1024;
}
mgr.allocate_block(1, 4096, CxlTier::LocalDram).unwrap();
for _ in 0..10 {
mgr.access_block(1);
}
let block = mgr.blocks.get(&1).unwrap();
assert_eq!(block.access_count, 10);
}
#[test]
fn test_manager_promote() {
let mut mgr = LinuxCxlManager::new();
for tier in [CxlTier::LocalDram, CxlTier::CxlNear, CxlTier::CxlFar] {
if let Some(stats) = mgr.tier_stats.get_mut(&tier) {
stats.capacity = 1024 * 1024 * 1024;
}
}
mgr.allocate_block(1, 4096, CxlTier::CxlFar).unwrap();
let result = mgr.promote_block(1);
assert!(result.is_ok());
assert_eq!(result.unwrap(), CxlTier::CxlNear);
let block = mgr.blocks.get(&1).unwrap();
assert_eq!(block.current_tier, CxlTier::CxlNear);
}
#[test]
fn test_manager_demote() {
let mut mgr = LinuxCxlManager::new();
for tier in [CxlTier::LocalDram, CxlTier::CxlNear, CxlTier::CxlFar] {
if let Some(stats) = mgr.tier_stats.get_mut(&tier) {
stats.capacity = 1024 * 1024 * 1024;
}
}
mgr.allocate_block(1, 4096, CxlTier::LocalDram).unwrap();
let result = mgr.demote_block(1);
assert!(result.is_ok());
assert_eq!(result.unwrap(), CxlTier::CxlNear);
let block = mgr.blocks.get(&1).unwrap();
assert_eq!(block.current_tier, CxlTier::CxlNear);
}
#[test]
fn test_manager_tier_full() {
let mut mgr = LinuxCxlManager::new();
if let Some(stats) = mgr.tier_stats.get_mut(&CxlTier::LocalDram) {
stats.capacity = 1000; }
let result = mgr.allocate_block(1, 4096, CxlTier::LocalDram);
assert!(matches!(result, Err(CxlError::TierFull)));
}
#[test]
fn test_manager_block_not_found() {
let mut mgr = LinuxCxlManager::new();
let result = mgr.free_block(999);
assert!(matches!(result, Err(CxlError::BlockNotFound)));
let result = mgr.promote_block(999);
assert!(matches!(result, Err(CxlError::BlockNotFound)));
}
#[test]
fn test_migration_status() {
assert_eq!(MigrationStatus::Success, MigrationStatus::Success);
assert_ne!(MigrationStatus::Success, MigrationStatus::Busy);
}
#[test]
fn test_determine_tier_for_device() {
let mut device = CxlDevice::new(0, String::from("test"), 0);
device.latency_ns = 100;
assert_eq!(determine_tier_for_device(&device), CxlTier::LocalDram);
device.latency_ns = 300;
assert_eq!(determine_tier_for_device(&device), CxlTier::CxlNear);
device.latency_ns = 1000;
assert_eq!(determine_tier_for_device(&device), CxlTier::CxlFar);
device.latency_ns = 5000;
assert_eq!(determine_tier_for_device(&device), CxlTier::Storage);
}
}