1use crate::GpuOptimError;
6#[cfg(any(
7 feature = "cuda",
8 feature = "metal",
9 feature = "opencl",
10 feature = "wgpu"
11))]
12use scirs2_core::gpu::{GpuBackend, GpuContext};
13
14pub fn align_size(size: usize, alignment: usize) -> usize {
16 if alignment == 0 || !alignment.is_power_of_two() {
17 return size;
18 }
19 (size + alignment - 1) & !(alignment - 1)
20}
21
22pub fn is_aligned(addr: usize, alignment: usize) -> bool {
24 if !alignment.is_power_of_two() {
25 return false;
26 }
27 addr & (alignment - 1) == 0
28}
29
30pub fn calculate_fragmentation(free_blocks: &[(usize, usize)]) -> f32 {
32 if free_blocks.is_empty() {
33 return 0.0;
34 }
35
36 let total_free: usize = free_blocks.iter().map(|(size, count)| size * count).sum();
37 let largest_block = free_blocks.iter().map(|(size, _)| *size).max().unwrap_or(0);
38
39 if total_free == 0 {
40 0.0
41 } else {
42 1.0 - (largest_block as f32 / total_free as f32)
43 }
44}
45
46pub fn format_bytes(bytes: usize) -> String {
48 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
49 let mut size = bytes as f64;
50 let mut unit_index = 0;
51
52 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
53 size /= 1024.0;
54 unit_index += 1;
55 }
56
57 if unit_index == 0 {
58 format!("{} {}", bytes, UNITS[unit_index])
59 } else {
60 format!("{:.2} {}", size, UNITS[unit_index])
61 }
62}
63
64pub fn next_power_of_two(n: usize) -> usize {
66 if n == 0 {
67 return 1;
68 }
69 if n.is_power_of_two() {
70 return n;
71 }
72 1 << (64 - (n - 1).leading_zeros())
73}
74
75pub fn validate_ptr_and_size(ptr: *mut u8, size: usize) -> Result<(), GpuOptimError> {
77 if ptr.is_null() {
78 return Err(GpuOptimError::InvalidState("Null pointer".to_string()));
79 }
80
81 if size == 0 {
82 return Err(GpuOptimError::InvalidState("Zero size".to_string()));
83 }
84
85 Ok(())
86}
87
88pub fn calculate_block_size(n: usize, max_threads: usize) -> (usize, usize) {
90 let block_size = 256.min(max_threads);
91 let grid_size = (n + block_size - 1) / block_size;
92 (grid_size, block_size)
93}
94
95#[cfg(any(
97 feature = "cuda",
98 feature = "metal",
99 feature = "opencl",
100 feature = "wgpu"
101))]
102pub fn get_optimal_backend() -> GpuBackend {
103 let backends = [
105 GpuBackend::Cuda,
106 GpuBackend::Metal,
107 GpuBackend::Rocm,
108 GpuBackend::Wgpu,
109 ];
110
111 for backend in &backends {
112 if GpuContext::new(*backend).is_ok() {
113 return *backend;
114 }
115 }
116
117 GpuBackend::Cpu
119}
120
121#[cfg(not(any(
123 feature = "cuda",
124 feature = "metal",
125 feature = "opencl",
126 feature = "wgpu"
127)))]
128pub fn get_optimal_backend() -> GpuBackend {
129 GpuBackend::Cpu
130}
131
132#[cfg(test)]
133mod tests {
134 use super::*;
135
136 #[test]
137 fn test_align_size() {
138 assert_eq!(align_size(100, 256), 256);
139 assert_eq!(align_size(256, 256), 256);
140 assert_eq!(align_size(300, 256), 512);
141 }
142
143 #[test]
144 fn test_is_aligned() {
145 assert!(is_aligned(0x1000, 256));
146 assert!(!is_aligned(0x1001, 256));
147 }
148
149 #[test]
150 fn test_format_bytes() {
151 assert_eq!(format_bytes(1024), "1.00 KB");
152 assert_eq!(format_bytes(1048576), "1.00 MB");
153 assert_eq!(format_bytes(512), "512 B");
154 }
155
156 #[test]
157 fn test_next_power_of_two() {
158 assert_eq!(next_power_of_two(100), 128);
159 assert_eq!(next_power_of_two(128), 128);
160 assert_eq!(next_power_of_two(0), 1);
161 }
162}