#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuMemoryProfile {
Discrete,
Integrated,
Cpu,
}
impl GpuMemoryProfile {
pub fn from_device_type(device_type: super::adapter::GpuDeviceType) -> Self {
match device_type {
super::adapter::GpuDeviceType::DiscreteGpu => Self::Discrete,
super::adapter::GpuDeviceType::IntegratedGpu => Self::Integrated,
_ => Self::Cpu,
}
}
pub fn buffer_pool_size(&self) -> usize {
match self {
Self::Discrete => 64 * 1024 * 1024, Self::Integrated => 16 * 1024 * 1024, Self::Cpu => 4 * 1024 * 1024, }
}
pub fn ring_buffer_slots(&self) -> usize {
match self {
Self::Discrete => 3, Self::Integrated => 2, Self::Cpu => 2, }
}
pub fn max_upload_batch_size(&self) -> usize {
match self {
Self::Discrete => 4 * 1024 * 1024, Self::Integrated => 1024 * 1024, Self::Cpu => 256 * 1024, }
}
pub fn merge_small_uploads(&self) -> bool {
matches!(self, Self::Discrete | Self::Integrated)
}
pub fn small_upload_threshold(&self) -> usize {
match self {
Self::Discrete => 64 * 1024, Self::Integrated => 16 * 1024, Self::Cpu => 4 * 1024, }
}
pub fn mapping_strategy(&self) -> MappingStrategy {
match self {
Self::Discrete => MappingStrategy::PersistentMapped,
Self::Integrated => MappingStrategy::PersistentMapped,
Self::Cpu => MappingStrategy::WriteCombined,
}
}
pub fn fence_strategy(&self) -> FenceStrategy {
match self {
Self::Discrete => FenceStrategy::GpuTimestamp,
Self::Integrated => FenceStrategy::CpuFence,
Self::Cpu => FenceStrategy::CpuFence,
}
}
pub fn use_coherent_memory(&self) -> bool {
matches!(self, Self::Integrated | Self::Cpu)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MappingStrategy {
PersistentMapped,
WriteCombined,
Cached,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FenceStrategy {
GpuTimestamp,
CpuFence,
Spinlock,
}
#[derive(Debug, Clone)]
pub struct StagingBufferPoolConfig {
pub pool_size: usize,
pub ring_slots: usize,
pub max_batch_size: usize,
pub merge_uploads: bool,
pub small_upload_threshold: usize,
pub mapping_strategy: MappingStrategy,
pub fence_strategy: FenceStrategy,
pub use_coherent_memory: bool,
pub alignment: usize,
}
impl StagingBufferPoolConfig {
pub fn new() -> Self {
Self::for_profile(GpuMemoryProfile::Discrete)
}
pub fn for_profile(profile: GpuMemoryProfile) -> Self {
Self {
pool_size: profile.buffer_pool_size(),
ring_slots: profile.ring_buffer_slots(),
max_batch_size: profile.max_upload_batch_size(),
merge_uploads: profile.merge_small_uploads(),
small_upload_threshold: profile.small_upload_threshold(),
mapping_strategy: profile.mapping_strategy(),
fence_strategy: profile.fence_strategy(),
use_coherent_memory: profile.use_coherent_memory(),
alignment: 256, }
}
pub fn discrete() -> Self {
Self::for_profile(GpuMemoryProfile::Discrete)
}
pub fn integrated() -> Self {
Self::for_profile(GpuMemoryProfile::Integrated)
}
pub fn cpu() -> Self {
Self::for_profile(GpuMemoryProfile::Cpu)
}
pub fn with_memory_limit(mut self, available_memory: usize) -> Self {
let max_pool_size = available_memory / 4; self.pool_size = self.pool_size.min(max_pool_size);
self
}
}
impl Default for StagingBufferPoolConfig {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct GpuRingBufferSlot {
pub index: usize,
pub offset: usize,
pub size: usize,
pub in_use: bool,
pub last_used_frame: u64,
}
pub struct GpuStagingBufferPool {
config: StagingBufferPoolConfig,
slots: Vec<GpuRingBufferSlot>,
current_slot: usize,
current_frame: u64,
total_allocated: usize,
total_used: usize,
fallback_pool: Option<crate::memory::BufferPool>,
}
impl GpuStagingBufferPool {
pub fn new(config: StagingBufferPoolConfig) -> Self {
let slot_size = config.pool_size / config.ring_slots;
let mut slots = Vec::with_capacity(config.ring_slots);
for i in 0..config.ring_slots {
slots.push(GpuRingBufferSlot {
index: i,
offset: i * slot_size,
size: slot_size,
in_use: false,
last_used_frame: 0,
});
}
Self {
config,
slots,
current_slot: 0,
current_frame: 0,
total_allocated: 0,
total_used: 0,
fallback_pool: None,
}
}
pub fn for_gpu_type(device_type: super::adapter::GpuDeviceType) -> Self {
let profile = GpuMemoryProfile::from_device_type(device_type);
Self::new(StagingBufferPoolConfig::for_profile(profile))
}
pub fn with_fallback_pool(mut self, pool: crate::memory::BufferPool) -> Self {
self.fallback_pool = Some(pool);
self
}
pub fn next_frame(&mut self) {
self.current_frame += 1;
if let Some(slot) = self.slots.get_mut(self.current_slot) {
slot.in_use = true;
slot.last_used_frame = self.current_frame;
}
self.current_slot = (self.current_slot + 1) % self.config.ring_slots;
if let Some(slot) = self.slots.get_mut(self.current_slot) {
slot.in_use = false;
self.total_used = 0;
}
}
pub fn allocate(&mut self, size: usize) -> Option<GpuBufferAllocation> {
let aligned_size = (size + self.config.alignment - 1) & !(self.config.alignment - 1);
if self.config.merge_uploads && size < self.config.small_upload_threshold {
if let Some(allocation) = self.try_merge_allocate(size) {
return Some(allocation);
}
}
if aligned_size > self.config.max_batch_size {
return self.allocate_fallback(size);
}
let slot = self.slots.get(self.current_slot)?;
if self.total_used + aligned_size > slot.size {
return self.allocate_fallback(size);
}
let offset = slot.offset + self.total_used;
self.total_used += aligned_size;
self.total_allocated += aligned_size;
Some(GpuBufferAllocation {
slot_index: self.current_slot,
offset,
size: aligned_size,
frame_index: self.current_frame,
is_fallback: false,
})
}
fn allocate_fallback(&mut self, size: usize) -> Option<GpuBufferAllocation> {
if let Some(ref mut pool) = self.fallback_pool {
let buffer = pool.acquire_sized(size);
Some(GpuBufferAllocation {
slot_index: usize::MAX, offset: 0,
size: buffer.len(),
frame_index: self.current_frame,
is_fallback: true,
})
} else {
None
}
}
fn try_merge_allocate(&mut self, size: usize) -> Option<GpuBufferAllocation> {
let aligned_size = (size + self.config.alignment - 1) & !(self.config.alignment - 1);
let slot = self.slots.get(self.current_slot)?;
if slot.in_use {
return None;
}
let remaining_space = slot.size - self.total_used;
if remaining_space >= aligned_size {
let offset = self.total_used;
self.total_used += aligned_size;
self.total_allocated += aligned_size;
Some(GpuBufferAllocation {
slot_index: slot.index,
offset,
size: aligned_size,
frame_index: self.current_frame,
is_fallback: false,
})
} else {
None
}
}
pub fn current_frame(&self) -> u64 {
self.current_frame
}
pub fn config(&self) -> &StagingBufferPoolConfig {
&self.config
}
pub fn memory_stats(&self) -> GpuBufferPoolStats {
GpuBufferPoolStats {
total_size: self.config.pool_size,
used_size: self.total_used,
allocated_size: self.total_allocated,
slot_count: self.config.ring_slots,
current_slot: self.current_slot,
current_frame: self.current_frame,
fallback_used: self.fallback_pool.as_ref().map(|p| p.available()).unwrap_or(0),
}
}
pub fn wait_for_slot(&mut self, slot_index: usize) {
if let Some(slot) = self.slots.get(slot_index) {
if slot.in_use && slot.last_used_frame >= self.current_frame {
}
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct GpuBufferAllocation {
pub slot_index: usize,
pub offset: usize,
pub size: usize,
pub frame_index: u64,
pub is_fallback: bool,
}
#[derive(Debug, Clone, Copy)]
pub struct GpuBufferPoolStats {
pub total_size: usize,
pub used_size: usize,
pub allocated_size: usize,
pub slot_count: usize,
pub current_slot: usize,
pub current_frame: u64,
pub fallback_used: usize,
}
pub struct GpuUploadBatcher {
config: StagingBufferPoolConfig,
pending_uploads: Vec<GpuPendingUpload>,
current_batch_size: usize,
}
#[derive(Debug)]
struct GpuPendingUpload {
data: Vec<u8>,
destination_offset: usize,
}
impl GpuUploadBatcher {
pub fn new(config: StagingBufferPoolConfig) -> Self {
Self { config, pending_uploads: Vec::new(), current_batch_size: 0 }
}
pub fn add_upload(&mut self, data: Vec<u8>, destination_offset: usize) -> bool {
let size = data.len();
if self.current_batch_size + size > self.config.max_batch_size {
return false; }
if self.config.merge_uploads && size < self.config.small_upload_threshold {
if let Some(merged) = self.try_merge(&data, destination_offset) {
self.current_batch_size += merged;
return true;
}
}
self.pending_uploads.push(GpuPendingUpload { data, destination_offset });
self.current_batch_size += size;
true
}
fn try_merge(&mut self, data: &[u8], offset: usize) -> Option<usize> {
for upload in &mut self.pending_uploads {
let end = upload.destination_offset + upload.data.len();
if end == offset {
upload.data.extend_from_slice(data);
return Some(data.len());
}
}
None
}
pub fn batch_size(&self) -> usize {
self.current_batch_size
}
pub fn is_full(&self) -> bool {
self.current_batch_size >= self.config.max_batch_size
}
pub fn flush(&mut self) -> Vec<(Vec<u8>, usize)> {
let uploads =
self.pending_uploads.drain(..).map(|u| (u.data, u.destination_offset)).collect();
self.current_batch_size = 0;
uploads
}
pub fn pending_count(&self) -> usize {
self.pending_uploads.len()
}
}
pub struct GpuBufferPoolMonitor {
stats_history: Vec<GpuBufferPoolStats>,
max_history: usize,
}
impl GpuBufferPoolMonitor {
pub fn new(max_history: usize) -> Self {
Self { stats_history: Vec::with_capacity(max_history), max_history }
}
pub fn record(&mut self, stats: GpuBufferPoolStats) {
if self.stats_history.len() >= self.max_history {
self.stats_history.remove(0);
}
self.stats_history.push(stats);
}
pub fn average_utilization(&self) -> f32 {
if self.stats_history.is_empty() {
return 0.0;
}
let total: f32 =
self.stats_history.iter().map(|s| s.used_size as f32 / s.total_size as f32).sum();
total / self.stats_history.len() as f32
}
pub fn is_under_pressure(&self) -> bool {
if self.stats_history.len() < 3 {
return false;
}
let recent: Vec<_> = self.stats_history.iter().rev().take(3).collect();
recent.iter().all(|s| s.used_size as f32 / s.total_size as f32 > 0.8)
}
pub fn is_underutilized(&self) -> bool {
if self.stats_history.len() < 10 {
return false;
}
self.average_utilization() < 0.3
}
pub fn is_fallback_heavy(&self) -> bool {
if self.stats_history.len() < 5 {
return false;
}
let recent: Vec<_> = self.stats_history.iter().rev().take(5).collect();
recent.iter().any(|s| s.fallback_used > 0)
}
}
pub mod integration {
use super::*;
use crate::memory::{BufferPool, PoolConfig};
pub fn create_gpu_buffer_pool_config(profile: GpuMemoryProfile) -> PoolConfig {
let _buffer_size = match profile {
GpuMemoryProfile::Discrete => 4 * 1024 * 1024, GpuMemoryProfile::Integrated => 1024 * 1024, GpuMemoryProfile::Cpu => 256 * 1024, };
PoolConfig {
initial_size: profile.ring_buffer_slots(),
max_size: profile.ring_buffer_slots() * 2,
growth_factor: 1.0, }
}
pub fn create_fallback_pool(profile: GpuMemoryProfile) -> BufferPool {
let buffer_size = match profile {
GpuMemoryProfile::Discrete => 4 * 1024 * 1024,
GpuMemoryProfile::Integrated => 1024 * 1024,
GpuMemoryProfile::Cpu => 256 * 1024,
};
BufferPool::new(buffer_size, profile.ring_buffer_slots(), profile.ring_buffer_slots() * 2)
}
}
#[cfg(test)]
mod tests {
use super::super::adapter::GpuDeviceType;
use super::*;
#[test]
fn test_gpu_memory_profile_discrete() {
let profile = GpuMemoryProfile::Discrete;
assert_eq!(profile.buffer_pool_size(), 64 * 1024 * 1024);
assert_eq!(profile.ring_buffer_slots(), 3);
assert!(profile.merge_small_uploads());
}
#[test]
fn test_gpu_memory_profile_integrated() {
let profile = GpuMemoryProfile::Integrated;
assert_eq!(profile.buffer_pool_size(), 16 * 1024 * 1024);
assert_eq!(profile.ring_buffer_slots(), 2);
assert!(profile.use_coherent_memory());
}
#[test]
fn test_gpu_memory_profile_cpu() {
let profile = GpuMemoryProfile::Cpu;
assert_eq!(profile.buffer_pool_size(), 4 * 1024 * 1024);
assert_eq!(profile.ring_buffer_slots(), 2);
assert!(!profile.merge_small_uploads());
}
#[test]
fn test_buffer_pool_allocation() {
let config = StagingBufferPoolConfig::discrete();
let mut pool = GpuStagingBufferPool::new(config);
let allocation = pool.allocate(1024).unwrap();
assert_eq!(allocation.slot_index, 0);
assert_eq!(allocation.offset, 0);
assert!(allocation.size >= 1024);
assert!(!allocation.is_fallback);
}
#[test]
fn test_buffer_pool_ring_rotation() {
let config = StagingBufferPoolConfig::discrete();
let mut pool = GpuStagingBufferPool::new(config);
pool.next_frame();
assert_eq!(pool.current_frame(), 1);
pool.next_frame();
assert_eq!(pool.current_frame(), 2);
}
#[test]
fn test_upload_batcher() {
let config = StagingBufferPoolConfig::discrete();
let mut batcher = GpuUploadBatcher::new(config);
assert!(batcher.add_upload(vec![0u8; 1024], 0));
assert_eq!(batcher.batch_size(), 1024);
let uploads = batcher.flush();
assert_eq!(uploads.len(), 1);
assert!(batcher.batch_size() == 0);
}
#[test]
fn test_buffer_pool_monitor() {
let mut monitor = GpuBufferPoolMonitor::new(10);
let stats = GpuBufferPoolStats {
total_size: 1024,
used_size: 512,
allocated_size: 512,
slot_count: 3,
current_slot: 0,
current_frame: 1,
fallback_used: 0,
};
monitor.record(stats);
assert_eq!(monitor.average_utilization(), 0.5);
}
#[test]
fn test_from_device_type() {
assert_eq!(
GpuMemoryProfile::from_device_type(GpuDeviceType::DiscreteGpu),
GpuMemoryProfile::Discrete
);
assert_eq!(
GpuMemoryProfile::from_device_type(GpuDeviceType::IntegratedGpu),
GpuMemoryProfile::Integrated
);
assert_eq!(GpuMemoryProfile::from_device_type(GpuDeviceType::Cpu), GpuMemoryProfile::Cpu);
}
#[test]
fn test_integration_config() {
let config = integration::create_gpu_buffer_pool_config(GpuMemoryProfile::Discrete);
assert_eq!(config.initial_size, 3);
assert_eq!(config.max_size, 6);
}
}