memkit-gpu 0.2.0-beta.1

Backend-agnostic GPU memory management for memkit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
//! Vulkan backend implementation using ash and gpu-allocator.
//!
//! This module provides a Vulkan-based GPU memory backend.
//!
//! # Example
//!
//! ```rust,ignore
//! use memkit_gpu::{MkGpu, VulkanBackend, VulkanConfig};
//!
//! let config = VulkanConfig::default();
//! let backend = VulkanBackend::new(config)?;
//! let gpu = MkGpu::new(backend);
//! ```

use ash::vk;
use gpu_allocator::vulkan::{Allocator, AllocatorCreateDesc, Allocation, AllocationCreateDesc, AllocationScheme};
use gpu_allocator::MemoryLocation;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};

use crate::backend::{MkGpuBackend, MkGpuCapabilities};
use crate::buffer::MkBufferUsage;
use crate::memory::MkMemoryType;

/// Vulkan backend configuration.
#[derive(Debug, Clone)]
pub struct VulkanConfig {
    /// Application name for Vulkan instance.
    pub app_name: String,
    /// Application version.
    pub app_version: u32,
    /// Enable validation layers.
    pub validation: bool,
    /// Preferred device index (None for auto-select).
    pub device_index: Option<usize>,
}

impl Default for VulkanConfig {
    fn default() -> Self {
        Self {
            app_name: "memkit-gpu".to_string(),
            app_version: 1,
            validation: cfg!(debug_assertions),
            device_index: None,
        }
    }
}

/// Vulkan backend using ash and gpu-allocator.
pub struct VulkanBackend {
    #[allow(dead_code)]
    entry: ash::Entry,
    instance: ash::Instance,
    physical_device: vk::PhysicalDevice,
    device: ash::Device,
    queue: vk::Queue,
    queue_family_index: u32,
    command_pool: vk::CommandPool,
    allocator: Mutex<Allocator>,
    buffers: Mutex<HashMap<u64, VulkanBuffer>>,
    next_id: AtomicU64,
    device_properties: vk::PhysicalDeviceProperties,
}

struct VulkanBuffer {
    buffer: vk::Buffer,
    allocation: Allocation,
    size: usize,
    usage: MkBufferUsage,
    memory_type: MkMemoryType,
    mapped_ptr: Option<*mut u8>,
}

// Safety: VulkanBuffer is protected by Mutex
unsafe impl Send for VulkanBuffer {}
unsafe impl Sync for VulkanBuffer {}

/// Vulkan buffer handle.
#[derive(Clone, Debug)]
pub struct VulkanBufferHandle {
    id: u64,
    size: usize,
    memory_type: MkMemoryType,
}

unsafe impl Send for VulkanBufferHandle {}
unsafe impl Sync for VulkanBufferHandle {}

/// Vulkan backend error.
#[derive(Debug)]
pub enum VulkanError {
    /// Vulkan API error.
    Vulkan(vk::Result),
    /// GPU allocator error.
    Allocator(gpu_allocator::AllocationError),
    /// No suitable GPU found.
    NoSuitableDevice,
    /// Buffer not found.
    BufferNotFound(u64),
    /// Buffer not mappable.
    NotMappable,
    /// Entry loading error.
    LoadError(String),
}

impl std::fmt::Display for VulkanError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            VulkanError::Vulkan(e) => write!(f, "Vulkan error: {:?}", e),
            VulkanError::Allocator(e) => write!(f, "Allocator error: {}", e),
            VulkanError::NoSuitableDevice => write!(f, "No suitable GPU device found"),
            VulkanError::BufferNotFound(id) => write!(f, "Buffer {} not found", id),
            VulkanError::NotMappable => write!(f, "Buffer is not mappable"),
            VulkanError::LoadError(msg) => write!(f, "Failed to load Vulkan: {}", msg),
        }
    }
}

impl std::error::Error for VulkanError {}

impl From<vk::Result> for VulkanError {
    fn from(e: vk::Result) -> Self {
        VulkanError::Vulkan(e)
    }
}

impl From<gpu_allocator::AllocationError> for VulkanError {
    fn from(e: gpu_allocator::AllocationError) -> Self {
        VulkanError::Allocator(e)
    }
}

impl VulkanBackend {
    /// Create a new Vulkan backend.
    pub fn new(config: VulkanConfig) -> Result<Self, VulkanError> {
        unsafe { Self::create_internal(config) }
    }

    unsafe fn create_internal(config: VulkanConfig) -> Result<Self, VulkanError> {
        // Load Vulkan
        let entry = ash::Entry::load()
            .map_err(|e| VulkanError::LoadError(e.to_string()))?;

        // Create instance
        let app_name = std::ffi::CString::new(config.app_name.as_str()).unwrap();
        let engine_name = std::ffi::CString::new("memkit-gpu").unwrap();

        let app_info = vk::ApplicationInfo::default()
            .application_name(&app_name)
            .application_version(config.app_version)
            .engine_name(&engine_name)
            .engine_version(1)
            .api_version(vk::make_api_version(0, 1, 2, 0));

        let mut layers = Vec::new();
        if config.validation {
            let validation_layer = std::ffi::CString::new("VK_LAYER_KHRONOS_validation").unwrap();
            layers.push(validation_layer);
        }
        let layer_ptrs: Vec<*const i8> = layers.iter().map(|l| l.as_ptr()).collect();

        let create_info = vk::InstanceCreateInfo::default()
            .application_info(&app_info)
            .enabled_layer_names(&layer_ptrs);

        let instance = entry.create_instance(&create_info, None)?;

        // Select physical device
        let physical_devices = instance.enumerate_physical_devices()?;
        if physical_devices.is_empty() {
            return Err(VulkanError::NoSuitableDevice);
        }

        let physical_device = if let Some(idx) = config.device_index {
            *physical_devices.get(idx).ok_or(VulkanError::NoSuitableDevice)?
        } else {
            // Prefer discrete GPU
            physical_devices
                .iter()
                .find(|&&pd| {
                    let props = instance.get_physical_device_properties(pd);
                    props.device_type == vk::PhysicalDeviceType::DISCRETE_GPU
                })
                .copied()
                .unwrap_or(physical_devices[0])
        };

        let device_properties = instance.get_physical_device_properties(physical_device);

        // Find queue family
        let queue_families = instance.get_physical_device_queue_family_properties(physical_device);
        let queue_family_index = queue_families
            .iter()
            .enumerate()
            .find(|(_, props)| props.queue_flags.contains(vk::QueueFlags::GRAPHICS | vk::QueueFlags::TRANSFER))
            .map(|(i, _)| i as u32)
            .ok_or(VulkanError::NoSuitableDevice)?;

        // Create logical device
        let queue_priorities = [1.0f32];
        let queue_create_info = vk::DeviceQueueCreateInfo::default()
            .queue_family_index(queue_family_index)
            .queue_priorities(&queue_priorities);

        let device_create_info = vk::DeviceCreateInfo::default()
            .queue_create_infos(std::slice::from_ref(&queue_create_info));

        let device = instance.create_device(physical_device, &device_create_info, None)?;
        let queue = device.get_device_queue(queue_family_index, 0);

        // Create command pool
        let pool_create_info = vk::CommandPoolCreateInfo::default()
            .queue_family_index(queue_family_index)
            .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER);

        let command_pool = device.create_command_pool(&pool_create_info, None)?;

        // Create allocator
        let allocator = Allocator::new(&AllocatorCreateDesc {
            instance: instance.clone(),
            device: device.clone(),
            physical_device,
            debug_settings: Default::default(),
            buffer_device_address: false,
            allocation_sizes: Default::default(),
        })?;

        Ok(Self {
            entry,
            instance,
            physical_device,
            device,
            queue,
            queue_family_index,
            command_pool,
            allocator: Mutex::new(allocator),
            buffers: Mutex::new(HashMap::new()),
            next_id: AtomicU64::new(1),
            device_properties,
        })
    }

    fn memory_location_from_type(memory_type: MkMemoryType) -> MemoryLocation {
        match memory_type {
            MkMemoryType::DeviceLocal => MemoryLocation::GpuOnly,
            MkMemoryType::HostVisible => MemoryLocation::CpuToGpu,
            MkMemoryType::HostCached => MemoryLocation::GpuToCpu,
            MkMemoryType::Unified => MemoryLocation::CpuToGpu,
        }
    }

    fn vk_buffer_usage_from_mk(usage: MkBufferUsage) -> vk::BufferUsageFlags {
        let mut flags = vk::BufferUsageFlags::empty();
        
        if usage.contains(MkBufferUsage::TRANSFER_SRC) {
            flags |= vk::BufferUsageFlags::TRANSFER_SRC;
        }
        if usage.contains(MkBufferUsage::TRANSFER_DST) {
            flags |= vk::BufferUsageFlags::TRANSFER_DST;
        }
        if usage.contains(MkBufferUsage::UNIFORM) {
            flags |= vk::BufferUsageFlags::UNIFORM_BUFFER;
        }
        if usage.contains(MkBufferUsage::STORAGE) {
            flags |= vk::BufferUsageFlags::STORAGE_BUFFER;
        }
        if usage.contains(MkBufferUsage::VERTEX) {
            flags |= vk::BufferUsageFlags::VERTEX_BUFFER;
        }
        if usage.contains(MkBufferUsage::INDEX) {
            flags |= vk::BufferUsageFlags::INDEX_BUFFER;
        }
        
        // Ensure at least one flag is set
        if flags.is_empty() {
            flags = vk::BufferUsageFlags::TRANSFER_DST;
        }
        
        flags
    }
}

impl Drop for VulkanBackend {
    fn drop(&mut self) {
        unsafe {
            self.device.device_wait_idle().ok();

            // Free all buffers
            let mut buffers = self.buffers.lock().unwrap();
            let mut allocator = self.allocator.lock().unwrap();
            
            for (_, buffer) in buffers.drain() {
                self.device.destroy_buffer(buffer.buffer, None);
                allocator.free(buffer.allocation).ok();
            }
            drop(allocator);
            drop(buffers);

            self.device.destroy_command_pool(self.command_pool, None);
            self.device.destroy_device(None);
            self.instance.destroy_instance(None);
        }
    }
}

impl MkGpuBackend for VulkanBackend {
    type BufferHandle = VulkanBufferHandle;
    type Error = VulkanError;

    fn name(&self) -> &'static str {
        "Vulkan"
    }

    fn capabilities(&self) -> MkGpuCapabilities {
        let props = &self.device_properties;
        
        MkGpuCapabilities {
            max_buffer_size: props.limits.max_storage_buffer_range as usize,
            max_allocations: 4096,
            unified_memory: false,
            coherent_memory: true,
            device_name: unsafe {
                std::ffi::CStr::from_ptr(props.device_name.as_ptr())
                    .to_string_lossy()
                    .to_string()
            },
            vendor_name: format!("Vendor ID: {}", props.vendor_id),
        }
    }

    fn create_buffer(
        &self,
        size: usize,
        usage: MkBufferUsage,
        memory_type: MkMemoryType,
    ) -> Result<Self::BufferHandle, Self::Error> {
        let vk_usage = Self::vk_buffer_usage_from_mk(usage);
        
        let buffer_info = vk::BufferCreateInfo::default()
            .size(size as u64)
            .usage(vk_usage)
            .sharing_mode(vk::SharingMode::EXCLUSIVE);

        let buffer = unsafe { self.device.create_buffer(&buffer_info, None)? };
        let requirements = unsafe { self.device.get_buffer_memory_requirements(buffer) };

        let allocation = self.allocator.lock().unwrap().allocate(&AllocationCreateDesc {
            name: "memkit buffer",
            requirements,
            location: Self::memory_location_from_type(memory_type),
            linear: true,
            allocation_scheme: AllocationScheme::GpuAllocatorManaged,
        })?;

        unsafe {
            self.device.bind_buffer_memory(buffer, allocation.memory(), allocation.offset())?;
        }

        let id = self.next_id.fetch_add(1, Ordering::Relaxed);
        
        let vk_buffer = VulkanBuffer {
            buffer,
            allocation,
            size,
            usage,
            memory_type,
            mapped_ptr: None,
        };

        self.buffers.lock().unwrap().insert(id, vk_buffer);

        Ok(VulkanBufferHandle { id, size, memory_type })
    }

    fn destroy_buffer(&self, handle: &Self::BufferHandle) {
        if let Some(buffer) = self.buffers.lock().unwrap().remove(&handle.id) {
            unsafe {
                self.device.destroy_buffer(buffer.buffer, None);
            }
            self.allocator.lock().unwrap().free(buffer.allocation).ok();
        }
    }

    fn map(&self, handle: &Self::BufferHandle) -> Option<*mut u8> {
        let mut buffers = self.buffers.lock().unwrap();
        let buffer = buffers.get_mut(&handle.id)?;

        if buffer.memory_type == MkMemoryType::DeviceLocal {
            return None;
        }

        buffer.allocation.mapped_ptr().map(|p| p.as_ptr() as *mut u8)
    }

    fn unmap(&self, _handle: &Self::BufferHandle) {
        // gpu-allocator keeps memory persistently mapped
    }

    fn flush(&self, handle: &Self::BufferHandle, offset: usize, size: usize) {
        let buffers = self.buffers.lock().unwrap();
        if let Some(buffer) = buffers.get(&handle.id) {
            unsafe {
                let range = vk::MappedMemoryRange::default()
                    .memory(buffer.allocation.memory())
                    .offset(buffer.allocation.offset() + offset as u64)
                    .size(size as u64);

                self.device.flush_mapped_memory_ranges(&[range]).ok();
            }
        }
    }

    fn invalidate(&self, handle: &Self::BufferHandle, offset: usize, size: usize) {
        let buffers = self.buffers.lock().unwrap();
        if let Some(buffer) = buffers.get(&handle.id) {
            unsafe {
                let range = vk::MappedMemoryRange::default()
                    .memory(buffer.allocation.memory())
                    .offset(buffer.allocation.offset() + offset as u64)
                    .size(size as u64);

                self.device.invalidate_mapped_memory_ranges(&[range]).ok();
            }
        }
    }

    fn copy_buffer(
        &self,
        src: &Self::BufferHandle,
        dst: &Self::BufferHandle,
        size: usize,
    ) -> Result<(), Self::Error> {
        self.copy_buffer_regions(src, 0, dst, 0, size)
    }

    fn copy_buffer_regions(
        &self,
        src: &Self::BufferHandle,
        src_offset: usize,
        dst: &Self::BufferHandle,
        dst_offset: usize,
        size: usize,
    ) -> Result<(), Self::Error> {
        let buffers = self.buffers.lock().unwrap();
        
        let src_buf = buffers.get(&src.id).ok_or(VulkanError::BufferNotFound(src.id))?;
        let dst_buf = buffers.get(&dst.id).ok_or(VulkanError::BufferNotFound(dst.id))?;

        let src_vk = src_buf.buffer;
        let dst_vk = dst_buf.buffer;
        drop(buffers);

        unsafe {
            // Allocate command buffer
            let alloc_info = vk::CommandBufferAllocateInfo::default()
                .command_pool(self.command_pool)
                .level(vk::CommandBufferLevel::PRIMARY)
                .command_buffer_count(1);

            let cmd_buffers = self.device.allocate_command_buffers(&alloc_info)?;
            let cmd = cmd_buffers[0];

            // Record copy command
            let begin_info = vk::CommandBufferBeginInfo::default()
                .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT);

            self.device.begin_command_buffer(cmd, &begin_info)?;

            let copy_region = vk::BufferCopy {
                src_offset: src_offset as u64,
                dst_offset: dst_offset as u64,
                size: size as u64,
            };

            self.device.cmd_copy_buffer(cmd, src_vk, dst_vk, &[copy_region]);
            self.device.end_command_buffer(cmd)?;

            // Submit
            let submit_info = vk::SubmitInfo::default()
                .command_buffers(&cmd_buffers);

            self.device.queue_submit(self.queue, &[submit_info], vk::Fence::null())?;
            self.device.queue_wait_idle(self.queue)?;

            // Free command buffer
            self.device.free_command_buffers(self.command_pool, &cmd_buffers);
        }

        Ok(())
    }

    fn wait_idle(&self) -> Result<(), Self::Error> {
        unsafe {
            self.device.device_wait_idle()?;
        }
        Ok(())
    }
}