1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
use std::{error::Error, ffi::CString, sync::{RwLock}};
use ash::{version::{InstanceV1_0, InstanceV1_1}, vk};
use gpu_allocator::{VulkanAllocator, VulkanAllocatorCreateDesc};
use crate::{Compute, fence::Fence};
pub(crate) struct GPU {
pub physical: vk::PhysicalDevice,
pub queue_families: Vec<QueueFamily>,
}
pub(crate) struct QueueFamily {
pub queue_count: u32,
pub physical_index: usize,
}
impl GPU {
pub(crate) unsafe fn new(
instance: &ash::Instance,
pdevice: vk::PhysicalDevice
) -> Option<GPU> {
let mut sp = vk::PhysicalDeviceSubgroupProperties::builder();
let mut dp2 = vk::PhysicalDeviceProperties2::builder()
.push_next(&mut sp)
.build();
instance
.fp_v1_1()
.get_physical_device_properties2(pdevice, &mut dp2);
let device_name = dp2
.properties
.device_name
.iter()
.filter_map(|f| {
let u = *f as u8;
match u {
0 => None,
_ => Some(u as char),
}
})
.collect::<String>();
println!("Found device: {} ({:?})", device_name, dp2.properties.device_type);
println!("Physical device has subgroup size of: {:?}", sp.subgroup_size);
println!("Supported subgroup operations: {:?}", sp.supported_operations);
println!("Supported subgroup stages: {:?}", sp.supported_stages);
let queues = instance
.get_physical_device_queue_family_properties(pdevice)
.iter()
.enumerate()
.filter_map(|(index, prop)| {
println!("Queue family at index {} has {} threads and capabilities: {:?}", index, prop.queue_count, prop.queue_flags);
match prop.queue_flags.contains(vk::QueueFlags::COMPUTE) {
false => None,
true => Some(QueueFamily{queue_count: prop.queue_count, physical_index: index}),
}
})
.collect::<Vec<QueueFamily>>();
match queues.is_empty() {
false => Some(GPU{physical: pdevice, queue_families: queues}),
true => None,
}
}
pub(crate) unsafe fn device(
&self,
instance: &ash::Instance
) -> Result<Compute, Box<dyn Error>> {
let queue_infos: Vec<_> = self.queue_families
.iter()
.map(|queue| {
let queues = (0..queue.queue_count)
.into_iter()
.map(|_| 1.0f32)
.collect::<Vec<f32>>();
vk::DeviceQueueCreateInfo::builder()
.queue_family_index(queue.physical_index as u32)
.queue_priorities(&queues)
.build()
})
.collect();
let features = vk::PhysicalDeviceFeatures {
..Default::default()
};
let memory = instance.get_physical_device_memory_properties(self.physical);
let mut variable_pointers = vk::PhysicalDeviceVariablePointersFeatures::builder()
.variable_pointers(true)
.variable_pointers_storage_buffer(true)
.build();
let mut ext_names: Vec<CString> = vec![
CString::new("VK_KHR_variable_pointers")?,
CString::new("VK_KHR_get_memory_requirements2")?,
CString::new("VK_KHR_dedicated_allocation")?,
];
if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") {
ext_names.push(CString::new("VK_KHR_portability_subset")?);
}
let ext_names_raw: Vec<_> = ext_names
.iter().map(|raw_name| raw_name.as_ptr()).collect();
let device_info = vk::DeviceCreateInfo::builder()
.queue_create_infos(&queue_infos)
.enabled_extension_names(&ext_names_raw)
.enabled_features(&features)
.push_next(&mut variable_pointers);
let device = instance.create_device(self.physical, &device_info, None)?;
let allocator_create_info = VulkanAllocatorCreateDesc {
physical_device: self.physical,
device: device.clone(),
instance: instance.clone(),
debug_settings: Default::default(),
buffer_device_address: false,
};
let allocator = VulkanAllocator::new(&allocator_create_info);
let fences = queue_infos
.iter()
.flat_map(|queue_info| {
(0..queue_info.queue_count)
.into_iter()
.filter_map(|index| {
match Fence::new(&device, queue_info.queue_family_index, index) {
Ok(f) => Some(f),
Err(_) => None,
}
})
.collect::<Vec<Fence>>()
})
.collect::<Vec<Fence>>();
Ok(Compute{ device, allocator: Some(RwLock::new(allocator)), fences, memory })
}
}