#[cfg(not(feature = "std"))]
extern crate alloc;
#[cfg(not(feature = "std"))]
use alloc::string::{String, ToString};
use super::buffer::GpuBuffer;
use super::error::{GpuError, GpuResult};
use super::plan::GpuDirection;
use super::GpuBackend;
use super::GpuCapabilities;
use crate::kernel::{Complex, Float};
#[must_use]
pub fn is_available() -> bool {
#[cfg(target_os = "linux")]
{
std::path::Path::new("/dev/nvidia0").exists()
|| std::path::Path::new("/proc/driver/nvidia/version").exists()
}
#[cfg(target_os = "windows")]
{
std::path::Path::new("C:\\Windows\\System32\\nvcuda.dll").exists()
}
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
{
false
}
}
pub fn query_capabilities() -> GpuResult<GpuCapabilities> {
if !is_available() {
return Err(GpuError::NoBackendAvailable);
}
Ok(GpuCapabilities {
backend: GpuBackend::Cuda,
device_name: "NVIDIA GPU".to_string(),
total_memory: 0,
available_memory: 0,
max_fft_size: 1 << 27, supports_f64: true,
supports_f16: true,
compute_units: 0,
max_workgroup_size: 1024,
})
}
pub fn synchronize() -> GpuResult<()> {
Ok(())
}
#[derive(Debug)]
pub struct CudaFftPlan {
size: usize,
batch_size: usize,
#[allow(dead_code)]
handle: u64,
}
impl CudaFftPlan {
pub fn new(size: usize, batch_size: usize) -> GpuResult<Self> {
if !is_available() {
return Err(GpuError::NoBackendAvailable);
}
if size == 0 || !size.is_power_of_two() && size > 1 << 24 {
return Err(GpuError::InvalidSize(size));
}
Ok(Self {
size,
batch_size,
handle: 0, })
}
pub fn execute<T: Float>(
&self,
input: &GpuBuffer<T>,
output: &mut GpuBuffer<T>,
direction: GpuDirection,
) -> GpuResult<()> {
let expected_size = self.size * self.batch_size;
if input.size() != expected_size || output.size() != expected_size {
return Err(GpuError::SizeMismatch {
expected: expected_size,
got: input.size().min(output.size()),
});
}
self.execute_fallback(input, output, direction)
}
fn execute_fallback<T: Float>(
&self,
input: &GpuBuffer<T>,
output: &mut GpuBuffer<T>,
direction: GpuDirection,
) -> GpuResult<()> {
use crate::api::{Direction, Flags, Plan};
let dir = match direction {
GpuDirection::Forward => Direction::Forward,
GpuDirection::Inverse => Direction::Backward,
};
for batch in 0..self.batch_size {
let start = batch * self.size;
let end = start + self.size;
let input_slice = &input.cpu_data()[start..end];
let output_slice = &mut output.cpu_data_mut()[start..end];
if let Some(plan) = Plan::dft_1d(self.size, dir, Flags::ESTIMATE) {
let input_f64: Vec<Complex<f64>> = input_slice
.iter()
.map(|c| {
Complex::new(c.re.to_f64().unwrap_or(0.0), c.im.to_f64().unwrap_or(0.0))
})
.collect();
let mut output_f64 = vec![Complex::<f64>::zero(); self.size];
plan.execute(&input_f64, &mut output_f64);
for (i, c) in output_f64.iter().enumerate() {
output_slice[i] = Complex::new(T::from_f64(c.re), T::from_f64(c.im));
}
} else {
return Err(GpuError::ExecutionFailed(
"Failed to create CPU fallback plan".into(),
));
}
}
Ok(())
}
}
impl Drop for CudaFftPlan {
fn drop(&mut self) {
}
}
pub fn upload_buffer<T: Float>(_buffer: &mut GpuBuffer<T>) -> GpuResult<()> {
Ok(())
}
pub fn download_buffer<T: Float>(_buffer: &mut GpuBuffer<T>) -> GpuResult<()> {
Ok(())
}
pub fn free_buffer(_ptr: *mut core::ffi::c_void) -> GpuResult<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cuda_availability() {
let _ = is_available();
}
#[test]
fn test_cuda_capabilities() {
if is_available() {
let caps = query_capabilities().expect("Failed to query capabilities");
assert_eq!(caps.backend, GpuBackend::Cuda);
assert!(caps.supports_f64);
}
}
#[test]
fn test_cuda_plan_creation() {
if is_available() {
let plan = CudaFftPlan::new(1024, 1);
assert!(plan.is_ok());
}
}
}