use crate::sys::*;
use crate::core::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuVendor {
AMD,
NVIDIA,
Intel,
Other,
}
impl GpuVendor {
pub fn from_vendor_id(id: u32) -> Self {
match id {
0x1002 => GpuVendor::AMD, 0x10DE => GpuVendor::NVIDIA, 0x8086 => GpuVendor::Intel, _ => GpuVendor::Other,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BarrierType {
UploadToRead,
ReadToWrite,
WriteToRead,
}
pub struct BarrierConfig {
pub src_stage: VkPipelineStageFlags,
pub dst_stage: VkPipelineStageFlags,
pub src_access: VkAccessFlags,
pub dst_access: VkAccessFlags,
}
impl BarrierConfig {
pub fn optimal_for(vendor: GpuVendor, barrier_type: BarrierType) -> Self {
match (vendor, barrier_type) {
(_, BarrierType::UploadToRead) => BarrierConfig {
src_stage: VkPipelineStageFlags::HOST,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::HOST_WRITE,
dst_access: VkAccessFlags::SHADER_READ,
},
(GpuVendor::AMD, BarrierType::ReadToWrite) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_READ,
dst_access: VkAccessFlags::SHADER_WRITE,
},
(GpuVendor::NVIDIA, BarrierType::ReadToWrite) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_READ,
dst_access: VkAccessFlags::SHADER_WRITE,
},
(_, BarrierType::ReadToWrite) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_READ,
dst_access: VkAccessFlags::SHADER_WRITE,
},
(GpuVendor::AMD, BarrierType::WriteToRead) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_WRITE,
dst_access: VkAccessFlags::SHADER_READ,
},
(GpuVendor::NVIDIA, BarrierType::WriteToRead) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_WRITE,
dst_access: VkAccessFlags::SHADER_READ,
},
(_, BarrierType::WriteToRead) => BarrierConfig {
src_stage: VkPipelineStageFlags::COMPUTE_SHADER,
dst_stage: VkPipelineStageFlags::COMPUTE_SHADER,
src_access: VkAccessFlags::SHADER_WRITE,
dst_access: VkAccessFlags::SHADER_READ,
},
}
}
}
pub struct BarrierBatch {
memory_barriers: Vec<VkMemoryBarrier>,
buffer_barriers: Vec<VkBufferMemoryBarrier>,
vendor: GpuVendor,
}
impl BarrierBatch {
pub fn new(vendor: GpuVendor) -> Self {
Self {
memory_barriers: Vec::new(),
buffer_barriers: Vec::new(),
vendor,
}
}
pub fn add_memory_barrier(&mut self, barrier_type: BarrierType) {
let config = BarrierConfig::optimal_for(self.vendor, barrier_type);
self.memory_barriers.push(VkMemoryBarrier {
sType: VkStructureType::MemoryBarrier,
pNext: std::ptr::null(),
srcAccessMask: config.src_access,
dstAccessMask: config.dst_access,
});
}
pub fn add_buffer_barrier(
&mut self,
buffer: VkBuffer,
barrier_type: BarrierType,
offset: VkDeviceSize,
size: VkDeviceSize,
) {
let config = BarrierConfig::optimal_for(self.vendor, barrier_type);
self.buffer_barriers.push(VkBufferMemoryBarrier {
sType: VkStructureType::BufferMemoryBarrier,
pNext: std::ptr::null(),
srcAccessMask: config.src_access,
dstAccessMask: config.dst_access,
srcQueueFamilyIndex: VK_QUEUE_FAMILY_IGNORED,
dstQueueFamilyIndex: VK_QUEUE_FAMILY_IGNORED,
buffer,
offset,
size,
});
}
pub unsafe fn submit(
&self,
command_buffer: VkCommandBuffer,
barrier_type: BarrierType,
) {
if self.memory_barriers.is_empty() && self.buffer_barriers.is_empty() {
return; }
let config = BarrierConfig::optimal_for(self.vendor, barrier_type);
if let Some(icd) = super::icd_loader::get_icd() {
if let Some(barrier_fn) = icd.cmd_pipeline_barrier {
barrier_fn(
command_buffer,
config.src_stage,
config.dst_stage,
VkDependencyFlags::empty(),
self.memory_barriers.len() as u32,
if self.memory_barriers.is_empty() {
std::ptr::null()
} else {
self.memory_barriers.as_ptr()
},
self.buffer_barriers.len() as u32,
if self.buffer_barriers.is_empty() {
std::ptr::null()
} else {
self.buffer_barriers.as_ptr()
},
0, std::ptr::null(),
);
}
}
}
pub fn clear(&mut self) {
self.memory_barriers.clear();
self.buffer_barriers.clear();
}
}
pub struct BarrierTracker {
buffer_states: std::collections::HashMap<u64, VkAccessFlags>,
pending: BarrierBatch,
stats: BarrierStats,
}
#[derive(Default, Debug)]
pub struct BarrierStats {
pub total_barriers: u64,
pub elided_barriers: u64,
pub upload_barriers: u64,
pub read_write_barriers: u64,
pub write_read_barriers: u64,
}
impl BarrierTracker {
pub fn new(vendor: GpuVendor) -> Self {
Self {
buffer_states: std::collections::HashMap::new(),
pending: BarrierBatch::new(vendor),
stats: BarrierStats::default(),
}
}
pub fn track_buffer_access(
&mut self,
buffer: VkBuffer,
new_access: VkAccessFlags,
offset: VkDeviceSize,
size: VkDeviceSize,
) -> bool {
let buffer_key = buffer.as_raw();
let last_access = self.buffer_states.get(&buffer_key).copied()
.unwrap_or(VkAccessFlags::empty());
let barrier_type = if last_access.contains(VkAccessFlags::HOST_WRITE)
&& new_access.contains(VkAccessFlags::SHADER_READ) {
Some(BarrierType::UploadToRead)
} else if last_access.contains(VkAccessFlags::SHADER_READ)
&& new_access.contains(VkAccessFlags::SHADER_WRITE) {
Some(BarrierType::ReadToWrite)
} else if last_access.contains(VkAccessFlags::SHADER_WRITE)
&& new_access.contains(VkAccessFlags::SHADER_READ) {
Some(BarrierType::WriteToRead)
} else if last_access == new_access {
None } else {
Some(BarrierType::WriteToRead) };
if let Some(barrier_type) = barrier_type {
self.pending.add_buffer_barrier(buffer, barrier_type, offset, size);
self.buffer_states.insert(buffer_key, new_access);
self.stats.total_barriers += 1;
match barrier_type {
BarrierType::UploadToRead => self.stats.upload_barriers += 1,
BarrierType::ReadToWrite => self.stats.read_write_barriers += 1,
BarrierType::WriteToRead => self.stats.write_read_barriers += 1,
}
true
} else {
self.stats.elided_barriers += 1;
false
}
}
pub unsafe fn flush_barriers(&mut self, command_buffer: VkCommandBuffer) {
if !self.pending.buffer_barriers.is_empty() {
let barrier_type = if self.stats.upload_barriers > 0 {
BarrierType::UploadToRead
} else if self.stats.write_read_barriers > self.stats.read_write_barriers {
BarrierType::WriteToRead
} else {
BarrierType::ReadToWrite
};
self.pending.submit(command_buffer, barrier_type);
self.pending.clear();
}
}
pub fn stats(&self) -> &BarrierStats {
&self.stats
}
pub fn barriers_per_dispatch(&self, dispatch_count: u64) -> f64 {
if dispatch_count == 0 {
0.0
} else {
self.stats.total_barriers as f64 / dispatch_count as f64
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vendor_detection() {
assert_eq!(GpuVendor::from_vendor_id(0x1002), GpuVendor::AMD);
assert_eq!(GpuVendor::from_vendor_id(0x10DE), GpuVendor::NVIDIA);
assert_eq!(GpuVendor::from_vendor_id(0x8086), GpuVendor::Intel);
assert_eq!(GpuVendor::from_vendor_id(0x9999), GpuVendor::Other);
}
#[test]
fn test_barrier_config() {
let config = BarrierConfig::optimal_for(GpuVendor::AMD, BarrierType::UploadToRead);
assert_eq!(config.src_stage, VkPipelineStageFlags::HOST);
assert_eq!(config.dst_stage, VkPipelineStageFlags::COMPUTE_SHADER);
assert_eq!(config.src_access, VkAccessFlags::HOST_WRITE);
assert_eq!(config.dst_access, VkAccessFlags::SHADER_READ);
}
}