#![allow(unused_mut)]
#[allow(unused_imports)]
use crate::cuda::error::CudaResult;
use cust::memory::DevicePointer as CustDevicePointer;
use std::time::Instant;
#[derive(Debug, Clone, Copy)]
#[repr(transparent)]
pub struct SendSyncPtr<T>(*mut T);
impl<T> SendSyncPtr<T> {
pub fn new(ptr: *mut T) -> Self {
Self(ptr)
}
pub fn null() -> Self {
Self(std::ptr::null_mut())
}
pub fn as_ptr(&self) -> *mut T {
self.0
}
pub fn as_raw(&self) -> *mut T {
self.0
}
pub fn is_null(&self) -> bool {
self.0.is_null()
}
pub fn cast<U>(&self) -> SendSyncPtr<U> {
SendSyncPtr(self.0 as *mut U)
}
pub fn as_mut_ptr(&self) -> *mut T {
self.0
}
}
unsafe impl<T> Send for SendSyncPtr<T> {}
unsafe impl<T> Sync for SendSyncPtr<T> {}
impl<T> Default for SendSyncPtr<T> {
fn default() -> Self {
Self::null()
}
}
impl<T> From<*mut T> for SendSyncPtr<T> {
fn from(ptr: *mut T) -> Self {
Self::new(ptr)
}
}
pub type LocalDevicePointer<T> = SendSyncPtr<T>;
pub trait CudaMemoryAllocation {
fn as_ptr(&self) -> *mut u8;
fn size(&self) -> usize;
fn allocation_time(&self) -> Instant;
fn is_valid(&self) -> bool;
fn allocation_type(&self) -> AllocationType;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum AllocationType {
Device,
Unified,
Pinned,
Texture,
Surface,
Managed,
}
#[derive(Debug, Clone, Copy)]
pub struct CudaAllocation {
pub ptr: CustDevicePointer<u8>,
pub size: usize,
pub size_class: usize,
pub allocation_time: Instant,
pub in_use: bool,
pub device_id: usize,
}
#[derive(Debug, Clone)]
pub struct UnifiedAllocation {
pub ptr: SendSyncPtr<u8>,
pub size: usize,
pub allocation_time: Instant,
pub preferred_location: PreferredLocation,
pub access_hints: AccessHints,
pub migration_stats: MigrationStats,
pub metadata: AllocationMetadata,
}
#[derive(Debug, Clone)]
pub struct PinnedAllocation {
pub ptr: SendSyncPtr<u8>,
pub size: usize,
pub allocation_time: Instant,
pub usage_count: usize,
pub is_mapped: bool,
pub device_ptr: Option<CustDevicePointer<u8>>,
pub mapping_flags: PinnedMemoryFlags,
pub metadata: AllocationMetadata,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PreferredLocation {
Host,
Device(usize),
Auto,
}
#[derive(Debug, Clone)]
pub struct AccessHints {
pub read_mostly: bool,
pub access_frequency: AccessFrequency,
pub locality: DataLocality,
pub custom_hints: Vec<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AccessFrequency {
VeryHigh,
High,
Medium,
Low,
VeryLow,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DataLocality {
Sequential,
Random,
Temporal,
Spatial,
Mixed,
}
#[derive(Debug, Clone)]
pub struct MigrationStats {
pub host_to_device_migrations: u64,
pub device_to_host_migrations: u64,
pub total_bytes_migrated: u64,
pub average_migration_time: std::time::Duration,
pub last_migration: Option<Instant>,
}
#[derive(Debug, Clone, Copy)]
pub struct PinnedMemoryFlags {
pub enable_mapping: bool,
pub portable: bool,
pub write_combining: bool,
pub raw_flags: u32,
}
#[derive(Debug)]
pub struct AllocationMetadata {
pub id: u64,
pub tag: Option<String>,
pub stack_trace: Option<String>,
pub thread_id: u64,
pub process_id: u32,
pub alignment: usize,
pub is_temporary: bool,
pub expected_lifetime: Option<std::time::Duration>,
pub user_data: Option<Box<dyn std::any::Any + Send + Sync>>,
}
impl Clone for AllocationMetadata {
fn clone(&self) -> Self {
Self {
id: self.id,
tag: self.tag.clone(),
stack_trace: self.stack_trace.clone(),
thread_id: self.thread_id,
process_id: self.process_id,
alignment: self.alignment,
is_temporary: self.is_temporary,
expected_lifetime: self.expected_lifetime,
user_data: None,
}
}
}
#[derive(Debug, Clone)]
pub struct AllocationRequest {
pub size: usize,
pub alignment: Option<usize>,
pub allocation_type: AllocationType,
pub device_id: Option<usize>,
pub tag: Option<String>,
pub is_temporary: bool,
pub expected_lifetime: Option<std::time::Duration>,
pub priority: AllocationPriority,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum AllocationPriority {
Low,
Normal,
High,
Critical,
}
#[derive(Debug, Clone)]
pub struct AllocationStats {
pub total_allocations: u64,
pub active_allocations: u64,
pub total_bytes_allocated: u64,
pub current_bytes_allocated: u64,
pub peak_bytes_allocated: u64,
pub average_allocation_size: usize,
pub success_rate: f32,
pub cache_hit_rate: f32,
pub average_allocation_time: std::time::Duration,
pub fragmentation_level: f32,
}
impl CudaAllocation {
pub fn new(ptr: CustDevicePointer<u8>, size: usize, size_class: usize) -> Self {
Self {
ptr,
size,
size_class,
allocation_time: Instant::now(),
in_use: true,
device_id: 0, }
}
pub fn new_on_device(
ptr: CustDevicePointer<u8>,
size: usize,
size_class: usize,
device_id: usize,
) -> Self {
Self {
ptr,
size,
size_class,
allocation_time: Instant::now(),
in_use: true,
device_id,
}
}
pub fn as_device_ptr(&self) -> CustDevicePointer<u8> {
self.ptr
}
pub fn is_in_use(&self) -> bool {
self.in_use
}
pub fn mark_in_use(&mut self) {
self.in_use = true;
}
pub fn mark_free(&mut self) {
self.in_use = false;
}
pub fn age(&self) -> std::time::Duration {
Instant::now().duration_since(self.allocation_time)
}
pub fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr() as *mut u8
}
pub fn size(&self) -> usize {
self.size
}
}
impl CudaMemoryAllocation for CudaAllocation {
fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr() as *mut u8
}
fn size(&self) -> usize {
self.size
}
fn allocation_time(&self) -> Instant {
self.allocation_time
}
fn is_valid(&self) -> bool {
!self.ptr.is_null() && self.size > 0
}
fn allocation_type(&self) -> AllocationType {
AllocationType::Device
}
}
unsafe impl Send for CudaAllocation {}
unsafe impl Sync for CudaAllocation {}
impl UnifiedAllocation {
pub fn new(ptr: *mut u8, size: usize) -> Self {
Self {
ptr: SendSyncPtr::new(ptr),
size,
allocation_time: Instant::now(),
preferred_location: PreferredLocation::Auto,
access_hints: AccessHints::default(),
migration_stats: MigrationStats::default(),
metadata: AllocationMetadata::new(),
}
}
pub fn new_with_preference(
ptr: *mut u8,
size: usize,
preferred_location: PreferredLocation,
) -> Self {
Self {
ptr: SendSyncPtr::new(ptr),
size,
allocation_time: Instant::now(),
preferred_location,
access_hints: AccessHints::default(),
migration_stats: MigrationStats::default(),
metadata: AllocationMetadata::new(),
}
}
pub fn ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
pub fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
pub fn size(&self) -> usize {
self.size
}
pub fn age(&self) -> std::time::Duration {
Instant::now().duration_since(self.allocation_time)
}
pub fn record_migration(
&mut self,
from_device: bool,
bytes: usize,
duration: std::time::Duration,
) {
if from_device {
self.migration_stats.device_to_host_migrations += 1;
} else {
self.migration_stats.host_to_device_migrations += 1;
}
self.migration_stats.total_bytes_migrated += bytes as u64;
let total_migrations = self.migration_stats.host_to_device_migrations
+ self.migration_stats.device_to_host_migrations;
let total_time =
self.migration_stats.average_migration_time * (total_migrations - 1) as u32 + duration;
self.migration_stats.average_migration_time = total_time / total_migrations as u32;
self.migration_stats.last_migration = Some(Instant::now());
}
pub fn copy_from_host<T>(&self, data: &[T]) -> CudaResult<()> {
let byte_size = data.len() * std::mem::size_of::<T>();
if byte_size > self.size {
return Err(crate::cuda::error::CudaError::AllocationError(format!(
"Source data ({} bytes) larger than allocation ({} bytes)",
byte_size, self.size
)));
}
unsafe {
std::ptr::copy_nonoverlapping(data.as_ptr() as *const u8, self.ptr.as_ptr(), byte_size);
}
Ok(())
}
pub fn copy_to_host<T>(&self, data: &mut [T]) -> CudaResult<()> {
let byte_size = data.len() * std::mem::size_of::<T>();
if byte_size > self.size {
return Err(crate::cuda::error::CudaError::AllocationError(format!(
"Destination buffer ({} bytes) larger than allocation ({} bytes)",
byte_size, self.size
)));
}
unsafe {
std::ptr::copy_nonoverlapping(
self.ptr.as_ptr() as *const u8,
data.as_mut_ptr() as *mut u8,
byte_size,
);
}
Ok(())
}
}
impl CudaMemoryAllocation for UnifiedAllocation {
fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
fn size(&self) -> usize {
self.size
}
fn allocation_time(&self) -> Instant {
self.allocation_time
}
fn is_valid(&self) -> bool {
!self.ptr.is_null() && self.size > 0
}
fn allocation_type(&self) -> AllocationType {
AllocationType::Unified
}
}
impl PinnedAllocation {
pub fn new(ptr: *mut u8, size: usize) -> Self {
Self {
ptr: SendSyncPtr::new(ptr),
size,
allocation_time: Instant::now(),
usage_count: 0,
is_mapped: false,
device_ptr: None,
mapping_flags: PinnedMemoryFlags::default(),
metadata: AllocationMetadata::new(),
}
}
pub fn new_with_mapping(
ptr: *mut u8,
size: usize,
device_ptr: Option<CustDevicePointer<u8>>,
flags: PinnedMemoryFlags,
) -> Self {
Self {
ptr: SendSyncPtr::new(ptr),
size,
allocation_time: Instant::now(),
usage_count: 0,
is_mapped: device_ptr.is_some(),
device_ptr,
mapping_flags: flags,
metadata: AllocationMetadata::new(),
}
}
pub fn increment_usage(&mut self) {
self.usage_count += 1;
}
pub fn age(&self) -> std::time::Duration {
Instant::now().duration_since(self.allocation_time)
}
pub fn is_mapped(&self) -> bool {
self.is_mapped
}
pub fn device_ptr(&self) -> Option<CustDevicePointer<u8>> {
self.device_ptr
}
}
impl CudaMemoryAllocation for PinnedAllocation {
fn as_ptr(&self) -> *mut u8 {
self.ptr.as_ptr()
}
fn size(&self) -> usize {
self.size
}
fn allocation_time(&self) -> Instant {
self.allocation_time
}
fn is_valid(&self) -> bool {
!self.ptr.is_null() && self.size > 0
}
fn allocation_type(&self) -> AllocationType {
AllocationType::Pinned
}
}
unsafe impl Send for PinnedAllocation {}
unsafe impl Sync for PinnedAllocation {}
impl Default for AccessHints {
fn default() -> Self {
Self {
read_mostly: false,
access_frequency: AccessFrequency::Medium,
locality: DataLocality::Mixed,
custom_hints: Vec::new(),
}
}
}
impl Default for MigrationStats {
fn default() -> Self {
Self {
host_to_device_migrations: 0,
device_to_host_migrations: 0,
total_bytes_migrated: 0,
average_migration_time: std::time::Duration::from_secs(0),
last_migration: None,
}
}
}
impl Default for PinnedMemoryFlags {
fn default() -> Self {
Self {
enable_mapping: false,
portable: false,
write_combining: false,
raw_flags: 0,
}
}
}
impl AllocationMetadata {
pub fn new() -> Self {
use std::sync::atomic::{AtomicU64, Ordering};
static ALLOCATION_COUNTER: AtomicU64 = AtomicU64::new(1);
Self {
id: ALLOCATION_COUNTER.fetch_add(1, Ordering::Relaxed),
tag: None,
stack_trace: None,
thread_id: {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
std::thread::current().id().hash(&mut hasher);
hasher.finish()
},
process_id: std::process::id(),
alignment: 1,
is_temporary: false,
expected_lifetime: None,
user_data: None,
}
}
pub fn with_tag(tag: String) -> Self {
let mut metadata = Self::new();
metadata.tag = Some(tag);
metadata
}
}
impl Default for AllocationMetadata {
fn default() -> Self {
Self::new()
}
}
impl Default for AllocationRequest {
fn default() -> Self {
Self {
size: 0,
alignment: None,
allocation_type: AllocationType::Device,
device_id: None,
tag: None,
is_temporary: false,
expected_lifetime: None,
priority: AllocationPriority::Normal,
}
}
}
impl Default for AllocationStats {
fn default() -> Self {
Self {
total_allocations: 0,
active_allocations: 0,
total_bytes_allocated: 0,
current_bytes_allocated: 0,
peak_bytes_allocated: 0,
average_allocation_size: 0,
success_rate: 1.0,
cache_hit_rate: 0.0,
average_allocation_time: std::time::Duration::from_secs(0),
fragmentation_level: 0.0,
}
}
}
pub type AllocationResult<T> = Result<T, String>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AllocationStrategy {
FirstFit,
BestFit,
WorstFit,
Buddy,
Slab,
}
impl Default for AllocationStrategy {
fn default() -> Self {
Self::BestFit
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MemoryAlignment {
Default,
Cache,
Page,
LargePage,
Custom(usize),
}
impl MemoryAlignment {
pub fn bytes(&self) -> usize {
match self {
MemoryAlignment::Default => 256,
MemoryAlignment::Cache => 128,
MemoryAlignment::Page => 4096,
MemoryAlignment::LargePage => 2 * 1024 * 1024,
MemoryAlignment::Custom(bytes) => *bytes,
}
}
}
impl Default for MemoryAlignment {
fn default() -> Self {
Self::Default
}
}
pub fn size_class(size: usize) -> usize {
const MIN_SIZE: usize = 256;
if size <= MIN_SIZE {
MIN_SIZE
} else {
size.next_power_of_two().max(MIN_SIZE)
}
}
pub fn pinned_size_class(size: usize) -> usize {
const MIN_SIZE: usize = 4096;
if size <= MIN_SIZE {
MIN_SIZE
} else {
size.next_power_of_two().max(MIN_SIZE)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_allocation_metadata() {
let metadata1 = AllocationMetadata::new();
let metadata2 = AllocationMetadata::new();
assert_ne!(metadata1.id, metadata2.id);
assert_eq!(metadata1.alignment, 1);
assert!(!metadata1.is_temporary);
}
#[test]
fn test_size_class_calculation() {
assert_eq!(size_class(100), 256);
assert_eq!(size_class(256), 256);
assert_eq!(size_class(300), 512);
assert_eq!(size_class(1024), 1024);
assert_eq!(size_class(1025), 2048);
}
#[test]
fn test_pinned_size_class_calculation() {
assert_eq!(pinned_size_class(1000), 4096);
assert_eq!(pinned_size_class(4096), 4096);
assert_eq!(pinned_size_class(5000), 8192);
assert_eq!(pinned_size_class(8192), 8192);
assert_eq!(pinned_size_class(8193), 16384);
}
#[test]
fn test_access_hints_default() {
let hints = AccessHints::default();
assert!(!hints.read_mostly);
assert_eq!(hints.access_frequency, AccessFrequency::Medium);
assert_eq!(hints.locality, DataLocality::Mixed);
assert!(hints.custom_hints.is_empty());
}
#[test]
fn test_allocation_priority_ordering() {
assert!(AllocationPriority::Critical > AllocationPriority::High);
assert!(AllocationPriority::High > AllocationPriority::Normal);
assert!(AllocationPriority::Normal > AllocationPriority::Low);
}
#[test]
fn test_migration_stats() {
let mut stats = MigrationStats::default();
assert_eq!(stats.host_to_device_migrations, 0);
assert_eq!(stats.device_to_host_migrations, 0);
assert_eq!(stats.total_bytes_migrated, 0);
assert!(stats.last_migration.is_none());
}
#[test]
fn test_pinned_memory_flags() {
let flags = PinnedMemoryFlags::default();
assert!(!flags.enable_mapping);
assert!(!flags.portable);
assert!(!flags.write_combining);
assert_eq!(flags.raw_flags, 0);
}
#[test]
fn test_allocation_request() {
let request = AllocationRequest {
size: 1024,
allocation_type: AllocationType::Device,
tag: Some("test".to_string()),
priority: AllocationPriority::High,
..Default::default()
};
assert_eq!(request.size, 1024);
assert_eq!(request.allocation_type, AllocationType::Device);
assert_eq!(request.tag, Some("test".to_string()));
assert_eq!(request.priority, AllocationPriority::High);
}
}