use crate::error::SpatialResult;
use crate::memory_pool::DistancePool;
use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
use std::path::Path;
use std::process::Command;
use std::sync::Arc;
type GpuDeviceInfoResult = Result<(Vec<String>, Vec<(usize, usize)>), Box<dyn std::error::Error>>;
#[derive(Debug, Clone)]
pub struct GpuCapabilities {
pub gpu_available: bool,
pub device_count: usize,
pub total_memory: usize,
pub available_memory: usize,
pub compute_capability: Option<(u32, u32)>,
pub max_threads_per_block: usize,
pub max_blocks_per_grid: usize,
pub device_names: Vec<String>,
pub supported_backends: Vec<GpuBackend>,
}
impl Default for GpuCapabilities {
fn default() -> Self {
Self {
gpu_available: false,
device_count: 0,
total_memory: 0,
available_memory: 0,
compute_capability: None,
max_threads_per_block: 1024,
max_blocks_per_grid: 65535,
device_names: Vec::new(),
supported_backends: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum GpuBackend {
Cuda,
Rocm,
LevelZero,
Vulkan,
CpuFallback,
}
pub struct GpuDevice {
capabilities: GpuCapabilities,
preferred_backend: GpuBackend,
#[allow(dead_code)]
memory_pool: Arc<DistancePool>,
}
impl GpuDevice {
pub fn new() -> SpatialResult<Self> {
let capabilities = Self::detect_capabilities()?;
let preferred_backend = Self::select_optimal_backend(&capabilities);
let memory_pool = Arc::new(DistancePool::new(1000));
Ok(Self {
capabilities,
preferred_backend,
memory_pool,
})
}
fn detect_capabilities() -> SpatialResult<GpuCapabilities> {
let mut caps = GpuCapabilities::default();
#[cfg(feature = "cuda")]
{
if Self::check_cuda_available() {
caps.gpu_available = true;
caps.device_count = Self::get_cuda_device_count();
caps.supported_backends.push(GpuBackend::Cuda);
if let Ok((names, memory_info)) = Self::get_cuda_device_info() {
caps.device_names = names;
if let Some((total, available)) = memory_info.first() {
caps.total_memory = *total;
caps.available_memory = *available;
}
}
caps.max_threads_per_block = 1024;
caps.max_blocks_per_grid = 2147483647; caps.compute_capability = Self::get_cuda_compute_capability();
}
}
#[cfg(feature = "rocm")]
{
if Self::check_rocm_available() {
caps.gpu_available = true;
let rocm_count = Self::get_rocm_device_count();
if rocm_count > caps.device_count {
caps.device_count = rocm_count;
}
caps.supported_backends.push(GpuBackend::Rocm);
if let Ok((names, memory_info)) = Self::get_rocm_device_info() {
if caps.device_names.is_empty() {
caps.device_names = names;
} else {
caps.device_names.extend(names);
}
if let Some((total, available)) = memory_info.first() {
if caps.total_memory == 0 {
caps.total_memory = *total;
caps.available_memory = *available;
}
}
}
caps.max_threads_per_block = 1024;
caps.max_blocks_per_grid = 2147483647;
}
}
#[cfg(feature = "vulkan")]
{
if Self::check_vulkan_available() {
caps.gpu_available = true;
caps.supported_backends.push(GpuBackend::Vulkan);
if let Ok((names, memory_info)) = Self::get_vulkan_device_info() {
if caps.device_names.is_empty() {
caps.device_names = names;
} else {
caps.device_names.extend(names);
}
if let Some((total, available)) = memory_info.first() {
if caps.total_memory == 0 {
caps.total_memory = *total;
caps.available_memory = *available;
}
}
}
}
}
caps.supported_backends.push(GpuBackend::CpuFallback);
Ok(caps)
}
fn select_optimal_backend(caps: &GpuCapabilities) -> GpuBackend {
if caps.supported_backends.contains(&GpuBackend::Cuda) {
GpuBackend::Cuda
} else if caps.supported_backends.contains(&GpuBackend::Rocm) {
GpuBackend::Rocm
} else if caps.supported_backends.contains(&GpuBackend::Vulkan) {
GpuBackend::Vulkan
} else {
GpuBackend::CpuFallback
}
}
pub fn is_gpu_available(&self) -> bool {
self.capabilities.gpu_available
}
pub fn capabilities(&self) -> &GpuCapabilities {
&self.capabilities
}
pub fn optimal_block_size(&self, _problemsize: usize) -> usize {
match self.preferred_backend {
GpuBackend::Cuda => {
let warp_size = 32;
let optimal = (_problemsize / warp_size).max(1) * warp_size;
optimal.min(self.capabilities.max_threads_per_block)
}
GpuBackend::Rocm => {
let wavefront_size = 64;
let optimal = (_problemsize / wavefront_size).max(1) * wavefront_size;
optimal.min(self.capabilities.max_threads_per_block)
}
_ => {
256.min(self.capabilities.max_threads_per_block)
}
}
}
#[cfg(feature = "cuda")]
fn check_cuda_available() -> bool {
if let Ok(output) = Command::new("nvidia-smi")
.arg("--query-gpu=count")
.arg("--format=csv,noheader,nounits")
.output()
{
if output.status.success() {
if let Ok(count_str) = String::from_utf8(output.stdout) {
if let Ok(count) = count_str.trim().parse::<u32>() {
return count > 0;
}
}
}
}
#[cfg(target_os = "linux")]
{
Path::exists(Path::new("/usr/local/cuda/lib64/libcuda.so"))
|| Path::exists(Path::new("/usr/lib/x86_64-linux-gnu/libcuda.so"))
|| Path::exists(Path::new("/usr/lib64/libcuda.so"))
}
#[cfg(target_os = "windows")]
{
Path::exists(Path::new("C:\\Windows\\System32\\nvcuda.dll"))
}
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
false
}
#[cfg(feature = "cuda")]
fn get_cuda_device_count() -> usize {
if let Ok(output) = Command::new("nvidia-smi")
.arg("--query-gpu=count")
.arg("--format=csv,noheader,nounits")
.output()
{
if output.status.success() {
if let Ok(count_str) = String::from_utf8(output.stdout) {
if let Ok(count) = count_str.trim().parse::<usize>() {
return count;
}
}
}
}
if let Ok(output) = Command::new("nvidia-smi").arg("-L").output() {
if output.status.success() {
if let Ok(list_str) = String::from_utf8(output.stdout) {
return list_str
.lines()
.filter(|line| line.starts_with("GPU "))
.count();
}
}
}
0
}
#[cfg(feature = "rocm")]
fn check_rocm_available() -> bool {
if let Ok(output) = Command::new("rocm-smi").arg("--showid").output() {
if output.status.success() {
return true;
}
}
#[cfg(target_os = "linux")]
{
Path::exists(Path::new("/opt/rocm/lib/libhip.so"))
|| Path::exists(Path::new("/usr/lib/libhip.so"))
|| Path::exists(Path::new("/usr/lib/x86_64-linux-gnu/libhip.so"))
}
#[cfg(not(target_os = "linux"))]
false
}
#[cfg(feature = "rocm")]
fn get_rocm_device_count() -> usize {
if let Ok(output) = Command::new("rocm-smi").arg("--showid").output() {
if output.status.success() {
if let Ok(list_str) = String::from_utf8(output.stdout) {
return list_str
.lines()
.filter(|line| line.contains("GPU") || line.contains("card"))
.count();
}
}
}
#[cfg(target_os = "linux")]
{
use std::fs;
if let Ok(entries) = fs::read_dir("/sys/class/drm") {
let count = entries
.filter_map(Result::ok)
.filter(|entry| {
if let Ok(name) = entry.file_name().into_string() {
name.starts_with("card") && !name.contains("-")
} else {
false
}
})
.count();
if count > 0 {
return count;
}
}
}
0
}
#[cfg(feature = "vulkan")]
fn check_vulkan_available() -> bool {
if let Ok(output) = Command::new("vulkaninfo").arg("--summary").output() {
if output.status.success() {
if let Ok(info_str) = String::from_utf8(output.stdout) {
return info_str.contains("VK_QUEUE_COMPUTE_BIT")
|| info_str.contains("deviceType");
}
}
}
#[cfg(target_os = "linux")]
{
Path::exists(Path::new("/usr/lib/libvulkan.so"))
|| Path::exists(Path::new("/usr/lib/x86_64-linux-gnu/libvulkan.so"))
|| Path::exists(Path::new("/usr/local/lib/libvulkan.so"))
}
#[cfg(target_os = "windows")]
{
Path::exists(Path::new("C:\\Windows\\System32\\vulkan-1.dll"))
}
#[cfg(target_os = "macos")]
{
Path::exists(Path::new("/usr/local/lib/libvulkan.dylib"))
|| Path::exists(Path::new(
"/System/Library/Frameworks/Metal.framework/Metal",
))
}
#[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
false
}
#[cfg(feature = "cuda")]
fn get_cuda_device_info() -> GpuDeviceInfoResult {
let mut device_names = Vec::new();
let mut memory_info = Vec::new();
if let Ok(output) = Command::new("nvidia-smi")
.arg("--query-gpu=name")
.arg("--format=csv,noheader,nounits")
.output()
{
if output.status.success() {
if let Ok(names_str) = String::from_utf8(output.stdout) {
device_names = names_str.lines().map(|s| s.trim().to_string()).collect();
}
}
}
if let Ok(output) = Command::new("nvidia-smi")
.arg("--query-gpu=memory.total,memory.free")
.arg("--format=csv,noheader,nounits")
.output()
{
if output.status.success() {
if let Ok(memory_str) = String::from_utf8(output.stdout) {
for line in memory_str.lines() {
let parts: Vec<&str> = line.split(',').collect();
if parts.len() >= 2 {
if let (Ok(total), Ok(free)) = (
parts[0].trim().parse::<usize>(),
parts[1].trim().parse::<usize>(),
) {
memory_info.push((total * 1024 * 1024, free * 1024 * 1024));
}
}
}
}
}
}
Ok((device_names, memory_info))
}
#[cfg(feature = "cuda")]
fn get_cuda_compute_capability() -> Option<(u32, u32)> {
if let Ok(output) = Command::new("nvidia-smi")
.arg("--query-gpu=compute_cap")
.arg("--format=csv,noheader,nounits")
.output()
{
if output.status.success() {
if let Ok(cap_str) = String::from_utf8(output.stdout) {
if let Some(line) = cap_str.lines().next() {
let parts: Vec<&str> = line.trim().split('.').collect();
if parts.len() >= 2 {
if let (Ok(major), Ok(minor)) =
(parts[0].parse::<u32>(), parts[1].parse::<u32>())
{
return Some((major, minor));
}
}
}
}
}
}
None
}
#[cfg(feature = "rocm")]
fn get_rocm_device_info() -> GpuDeviceInfoResult {
let mut device_names = Vec::new();
let mut memory_info = Vec::new();
if let Ok(output) = Command::new("rocm-smi").arg("--showproductname").output() {
if output.status.success() {
if let Ok(info_str) = String::from_utf8(output.stdout) {
for line in info_str.lines() {
if line.contains("Card series:") {
if let Some(name) = line.split(':').nth(1) {
device_names.push(name.trim().to_string());
}
}
}
}
}
}
if let Ok(output) = Command::new("rocm-smi")
.arg("--showmeminfo")
.arg("vram")
.output()
{
if output.status.success() {
if let Ok(memory_str) = String::from_utf8(output.stdout) {
for line in memory_str.lines() {
if line.contains("Total memory") || line.contains("Used memory") {
if let Some(mem_part) = line
.split_whitespace()
.find(|s| s.ends_with("MB") || s.ends_with("GB"))
{
if let Ok(mem_val) = mem_part
.trim_end_matches("MB")
.trim_end_matches("GB")
.parse::<usize>()
{
let bytes = if mem_part.ends_with("GB") {
mem_val * 1024 * 1024 * 1024
} else {
mem_val * 1024 * 1024
};
memory_info.push((bytes, bytes / 2));
}
}
}
}
}
}
}
if device_names.is_empty() && memory_info.is_empty() {
device_names.push("AMD GPU (ROCm)".to_string());
memory_info.push((8 * 1024 * 1024 * 1024, 6 * 1024 * 1024 * 1024));
}
Ok((device_names, memory_info))
}
#[cfg(feature = "vulkan")]
fn get_vulkan_device_info() -> GpuDeviceInfoResult {
let mut device_names = Vec::new();
let mut memory_info = Vec::new();
if let Ok(output) = Command::new("vulkaninfo").arg("--summary").output() {
if output.status.success() {
if let Ok(info_str) = String::from_utf8(output.stdout) {
for line in info_str.lines() {
if line.contains("deviceName") {
if let Some(name_part) = line.split('=').nth(1) {
device_names.push(name_part.trim().to_string());
}
} else if line.contains("heapSize") {
if let Some(mem_part) = line.split('=').nth(1) {
if let Ok(mem_val) = mem_part.trim().parse::<usize>() {
memory_info.push((mem_val, mem_val * 3 / 4));
}
}
}
}
}
}
}
if device_names.is_empty() {
device_names.push("Vulkan Device".to_string());
memory_info.push((4 * 1024 * 1024 * 1024, 3 * 1024 * 1024 * 1024));
}
Ok((device_names, memory_info))
}
#[cfg(not(feature = "cuda"))]
#[allow(dead_code)]
fn get_cuda_device_info() -> GpuDeviceInfoResult {
Ok((Vec::new(), Vec::new()))
}
#[cfg(not(feature = "cuda"))]
#[allow(dead_code)]
fn get_cuda_compute_capability() -> Option<(u32, u32)> {
None
}
#[cfg(not(feature = "rocm"))]
#[allow(dead_code)]
fn get_rocm_device_info() -> GpuDeviceInfoResult {
Ok((Vec::new(), Vec::new()))
}
#[cfg(not(feature = "vulkan"))]
#[allow(dead_code)]
fn get_vulkan_device_info() -> GpuDeviceInfoResult {
Ok((Vec::new(), Vec::new()))
}
}
impl Default for GpuDevice {
fn default() -> Self {
Self::new().unwrap_or_else(|_| Self {
capabilities: GpuCapabilities::default(),
preferred_backend: GpuBackend::CpuFallback,
memory_pool: Arc::new(DistancePool::new(1000)),
})
}
}
pub struct GpuDistanceMatrix {
device: Arc<GpuDevice>,
batch_size: usize,
use_mixed_precision: bool,
}
impl GpuDistanceMatrix {
pub fn new() -> SpatialResult<Self> {
let device = Arc::new(GpuDevice::new()?);
Ok(Self {
device,
batch_size: 1024,
use_mixed_precision: true,
})
}
pub fn with_batch_size(mut self, batchsize: usize) -> Self {
self.batch_size = batchsize;
self
}
pub fn with_mixed_precision(mut self, use_mixedprecision: bool) -> Self {
self.use_mixed_precision = use_mixedprecision;
self
}
pub async fn compute_parallel(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<Array2<f64>> {
let _n_points = points.nrows();
if !self.device.is_gpu_available() {
return self.compute_cpu_fallback(points).await;
}
match self.device.preferred_backend {
GpuBackend::Cuda => self.compute_cuda(points).await,
GpuBackend::Rocm => self.compute_rocm(points).await,
GpuBackend::Vulkan => self.compute_vulkan(points).await,
GpuBackend::CpuFallback => self.compute_cpu_fallback(points).await,
GpuBackend::LevelZero => self.compute_cpu_fallback(points).await, }
}
async fn compute_cuda(&self, points: &ArrayView2<'_, f64>) -> SpatialResult<Array2<f64>> {
self.compute_cpu_fallback(points).await
}
async fn compute_rocm(&self, points: &ArrayView2<'_, f64>) -> SpatialResult<Array2<f64>> {
self.compute_cpu_fallback(points).await
}
async fn compute_vulkan(&self, points: &ArrayView2<'_, f64>) -> SpatialResult<Array2<f64>> {
self.compute_cpu_fallback(points).await
}
async fn compute_cpu_fallback(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<Array2<f64>> {
use crate::simd_distance::parallel_pdist;
let condensed = parallel_pdist(points, "euclidean")?;
let n = points.nrows();
let mut matrix = Array2::zeros((n, n));
let mut idx = 0;
for i in 0..n {
for j in (i + 1)..n {
matrix[[i, j]] = condensed[idx];
matrix[[j, i]] = condensed[idx];
idx += 1;
}
}
Ok(matrix)
}
}
pub struct GpuKMeans {
device: Arc<GpuDevice>,
k: usize,
max_iterations: usize,
tolerance: f64,
batch_size: usize,
}
impl GpuKMeans {
pub fn new(k: usize) -> SpatialResult<Self> {
let device = Arc::new(GpuDevice::new()?);
Ok(Self {
device,
k,
max_iterations: 100,
tolerance: 1e-6,
batch_size: 1024,
})
}
pub fn with_max_iterations(mut self, maxiterations: usize) -> Self {
self.max_iterations = maxiterations;
self
}
pub fn with_tolerance(mut self, tolerance: f64) -> Self {
self.tolerance = tolerance;
self
}
pub fn with_batch_size(mut self, batchsize: usize) -> Self {
self.batch_size = batchsize;
self
}
pub async fn fit(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
if !self.device.is_gpu_available() {
return self.fit_cpu_fallback(points).await;
}
match self.device.preferred_backend {
GpuBackend::Cuda => self.fit_cuda(points).await,
GpuBackend::Rocm => self.fit_rocm(points).await,
GpuBackend::Vulkan => self.fit_vulkan(points).await,
GpuBackend::CpuFallback => self.fit_cpu_fallback(points).await,
GpuBackend::LevelZero => self.fit_cpu_fallback(points).await, }
}
async fn fit_cuda(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
self.fit_cpu_fallback(points).await
}
async fn fit_rocm(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
self.fit_cpu_fallback(points).await
}
async fn fit_vulkan(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
self.fit_cpu_fallback(points).await
}
async fn fit_cpu_fallback(
&self,
points: &ArrayView2<'_, f64>,
) -> SpatialResult<(Array2<f64>, Array1<usize>)> {
use crate::simd_distance::advanced_simd_clustering::AdvancedSimdKMeans;
let advanced_kmeans = AdvancedSimdKMeans::new(self.k)
.with_mixed_precision(true)
.with_block_size(256);
advanced_kmeans.fit(points)
}
}
pub struct GpuNearestNeighbors {
device: Arc<GpuDevice>,
#[allow(dead_code)]
build_batch_size: usize,
#[allow(dead_code)]
query_batch_size: usize,
}
impl GpuNearestNeighbors {
pub fn new() -> SpatialResult<Self> {
let device = Arc::new(GpuDevice::new()?);
Ok(Self {
device,
build_batch_size: 1024,
query_batch_size: 256,
})
}
pub async fn knn_search(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
if !self.device.is_gpu_available() {
return self
.knn_search_cpu_fallback(query_points, data_points, k)
.await;
}
match self.device.preferred_backend {
GpuBackend::Cuda => self.knn_search_cuda(query_points, data_points, k).await,
GpuBackend::Rocm => self.knn_search_rocm(query_points, data_points, k).await,
GpuBackend::Vulkan => self.knn_search_vulkan(query_points, data_points, k).await,
GpuBackend::CpuFallback => {
self.knn_search_cpu_fallback(query_points, data_points, k)
.await
}
_ => {
self.knn_search_cpu_fallback(query_points, data_points, k)
.await
}
}
}
async fn knn_search_cuda(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
self.knn_search_cpu_fallback(query_points, data_points, k)
.await
}
async fn knn_search_rocm(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
self.knn_search_cpu_fallback(query_points, data_points, k)
.await
}
async fn knn_search_vulkan(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
self.knn_search_cpu_fallback(query_points, data_points, k)
.await
}
async fn knn_search_cpu_fallback(
&self,
query_points: &ArrayView2<'_, f64>,
data_points: &ArrayView2<'_, f64>,
k: usize,
) -> SpatialResult<(Array2<usize>, Array2<f64>)> {
use crate::simd_distance::advanced_simd_clustering::AdvancedSimdNearestNeighbors;
let advanced_nn = AdvancedSimdNearestNeighbors::new();
advanced_nn.simd_knn_advanced_fast(query_points, data_points, k)
}
}
impl Default for GpuNearestNeighbors {
fn default() -> Self {
Self::new().unwrap_or_else(|_| Self {
device: Arc::new(GpuDevice::default()),
build_batch_size: 1024,
query_batch_size: 256,
})
}
}
pub struct HybridProcessor {
gpu_device: Arc<GpuDevice>,
cpu_threshold: usize,
gpu_threshold: usize,
}
impl HybridProcessor {
pub fn new() -> SpatialResult<Self> {
let gpu_device = Arc::new(GpuDevice::new()?);
Ok(Self {
gpu_device,
cpu_threshold: 1000, gpu_threshold: 100000, })
}
pub fn choose_strategy(&self, _datasetsize: usize) -> ProcessingStrategy {
if !self.gpu_device.is_gpu_available() {
return ProcessingStrategy::CpuOnly;
}
if _datasetsize < self.cpu_threshold {
ProcessingStrategy::CpuOnly
} else if _datasetsize < self.gpu_threshold {
ProcessingStrategy::Hybrid
} else {
ProcessingStrategy::GpuOnly
}
}
pub fn optimal_batch_sizes(&self, _totalsize: usize) -> (usize, usize) {
let gpu_capability = self.gpu_device.capabilities().total_memory / (8 * 1024); let cpu_batch = (_totalsize / 4).max(1000); let gpu_batch = (_totalsize * 3 / 4).min(gpu_capability);
(cpu_batch, gpu_batch)
}
}
impl Default for HybridProcessor {
fn default() -> Self {
Self::new().unwrap_or_else(|_| Self {
gpu_device: Arc::new(GpuDevice::default()),
cpu_threshold: 1000,
gpu_threshold: 100000,
})
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ProcessingStrategy {
CpuOnly,
GpuOnly,
Hybrid,
}
static GLOBAL_GPU_DEVICE: std::sync::OnceLock<GpuDevice> = std::sync::OnceLock::new();
#[allow(dead_code)]
pub fn global_gpu_device() -> &'static GpuDevice {
GLOBAL_GPU_DEVICE.get_or_init(GpuDevice::default)
}
#[allow(dead_code)]
pub fn is_gpu_acceleration_available() -> bool {
global_gpu_device().is_gpu_available()
}
#[allow(dead_code)]
pub fn get_gpu_capabilities() -> &'static GpuCapabilities {
global_gpu_device().capabilities()
}
#[allow(dead_code)]
pub fn report_gpu_status() {
let device = global_gpu_device();
let caps = device.capabilities();
println!("GPU Acceleration Status:");
println!(" Available: {}", caps.gpu_available);
println!(" Device Count: {}", caps.device_count);
if caps.gpu_available {
println!(
" Total Memory: {:.1} GB",
caps.total_memory as f64 / (1024.0 * 1024.0 * 1024.0)
);
println!(
" Available Memory: {:.1} GB",
caps.available_memory as f64 / (1024.0 * 1024.0 * 1024.0)
);
println!(" Max Threads/Block: {}", caps.max_threads_per_block);
println!(" Supported Backends: {:?}", caps.supported_backends);
for (i, name) in caps.device_names.iter().enumerate() {
println!(" Device {i}: {name}");
}
} else {
println!(" Reason: No compatible GPU devices found");
println!(" Fallback: Using optimized CPU SIMD operations");
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::array;
#[test]
fn test_gpu_device_creation() {
let device = GpuDevice::new();
assert!(device.is_ok());
let device = device.expect("Operation failed");
assert!(!device.capabilities().supported_backends.is_empty());
}
#[test]
fn test_processing_strategy_selection() {
let processor = HybridProcessor::new().expect("Operation failed");
let strategy = processor.choose_strategy(500);
assert_eq!(strategy, ProcessingStrategy::CpuOnly);
let strategy = processor.choose_strategy(200000);
assert!(matches!(
strategy,
ProcessingStrategy::GpuOnly | ProcessingStrategy::CpuOnly
));
}
#[cfg(feature = "async")]
#[tokio::test]
async fn test_gpu_distance_matrix() {
let points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
let gpu_matrix = GpuDistanceMatrix::new().expect("Operation failed");
let points_view = points.view();
let result = gpu_matrix.compute_parallel(&points_view).await;
assert!(result.is_ok());
let matrix = result.expect("Operation failed");
assert_eq!(matrix.dim(), (4, 4));
}
#[cfg(feature = "async")]
#[tokio::test]
async fn test_gpu_kmeans() {
let points = array![
[0.0, 0.0],
[0.1, 0.1],
[0.0, 0.1], [5.0, 5.0],
[5.1, 5.1],
[5.0, 5.1], ];
let gpu_kmeans = GpuKMeans::new(2).expect("Operation failed");
let points_view = points.view();
let result = gpu_kmeans.fit(&points_view).await;
assert!(result.is_ok());
let (centroids, _assignments) = result.expect("Operation failed");
assert_eq!(centroids.nrows(), 2);
}
#[cfg(feature = "async")]
#[tokio::test]
async fn test_gpu_nearest_neighbors() {
let data_points = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
let query_points = array![[0.1, 0.1], [0.9, 0.9]];
let gpu_nn = GpuNearestNeighbors::new().expect("Operation failed");
let query_view = query_points.view();
let data_view = data_points.view();
let result = gpu_nn.knn_search(&query_view, &data_view, 2).await;
assert!(result.is_ok());
let (indices, _distances) = result.expect("Operation failed");
assert_eq!(indices[[0, 0]], 0); }
#[test]
fn test_global_gpu_functions() {
let device = global_gpu_device();
assert!(!device.capabilities.device_names.is_empty() || !device.capabilities.gpu_available);
report_gpu_status();
let _caps = get_gpu_capabilities();
let _available = is_gpu_acceleration_available();
}
}