pub mod context;
pub mod graph;
pub mod kernels;
pub mod memory;
pub mod streams;
pub mod tensor_core;
#[cfg(feature = "advanced_math")]
pub use context::{CudaContext, CudaDeviceProperties, CudaProfiler};
pub use graph::{
CudaGraph, CudaGraphBuilder, CudaGraphExec, GraphCaptureMode, GraphExecUpdateResult,
GraphInstantiationFlags, GraphNode, GraphNodeType, GraphStats, GraphUpdateResult,
HostNodeParams, KernelNodeParams, MemCopyKind, MemCopyNodeParams, MemSetNodeParams,
QuantumGraphScheduler,
};
#[cfg(feature = "advanced_math")]
pub use kernels::CudaKernel;
pub use kernels::{
CudaKernelConfig, CudaKernelStats, CudaQuantumKernels, GateType, OptimizationLevel,
};
pub use memory::GpuMemoryType;
#[cfg(feature = "advanced_math")]
pub use memory::{GpuMemory, GpuMemoryBlock, GpuMemoryPool};
#[cfg(feature = "advanced_math")]
pub use streams::CudaStream;
pub use streams::{StreamFlags, StreamPriority};
pub use tensor_core::{
fp16_utils, AccumulatorPrecision, TensorCoreConfig, TensorCoreGeneration, TensorCoreKernels,
TensorCoreOps,
};
use crate::error::Result;
pub fn initialize() -> Result<()> {
#[cfg(feature = "advanced_math")]
{
let device_count = CudaContext::get_device_count()?;
if device_count == 0 {
return Err(crate::error::SimulatorError::ResourceExhausted(
"No CUDA devices available".to_string(),
));
}
let _context = CudaContext::new(0)?;
}
#[cfg(not(feature = "advanced_math"))]
{
}
Ok(())
}
pub fn is_available() -> bool {
#[cfg(feature = "advanced_math")]
{
CudaContext::get_device_count().unwrap_or(0) > 0
}
#[cfg(not(feature = "advanced_math"))]
{
false
}
}
pub fn get_device_count() -> Result<i32> {
#[cfg(feature = "advanced_math")]
{
CudaContext::get_device_count()
}
#[cfg(not(feature = "advanced_math"))]
{
Ok(0)
}
}
#[cfg(feature = "advanced_math")]
pub fn get_device_properties(device_id: i32) -> Result<CudaDeviceProperties> {
let context = CudaContext::new(device_id)?;
Ok(context.get_device_properties().clone())
}
#[cfg(not(feature = "advanced_math"))]
pub fn get_device_properties(_device_id: i32) -> Result<()> {
Err(crate::error::SimulatorError::UnsupportedOperation(
"CUDA not available".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cuda_availability() {
let _is_available = is_available();
let _device_count = get_device_count().unwrap_or(0);
}
#[test]
fn test_cuda_initialization() {
let result = initialize();
assert!(result.is_ok() || !is_available());
}
#[cfg(feature = "advanced_math")]
#[test]
fn test_context_creation() {
if is_available() {
let context = CudaContext::new(0);
assert!(context.is_ok());
}
}
#[cfg(feature = "advanced_math")]
#[test]
fn test_stream_creation() {
if is_available() {
let stream = CudaStream::new();
assert!(stream.is_ok());
}
}
#[cfg(feature = "advanced_math")]
#[test]
fn test_memory_allocation() {
let mut memory = GpuMemory::new();
let result = memory.allocate_pool(1024);
assert!(result.is_ok());
}
#[test]
fn test_kernel_config_default() {
let config = CudaKernelConfig::default();
assert_eq!(config.device_id, 0);
assert_eq!(config.num_streams, 4);
assert_eq!(config.block_size, 256);
}
#[test]
fn test_cuda_graph_creation() {
let mut graph = CudaGraph::new();
assert!(graph.is_empty());
let params = KernelNodeParams::default();
let node_id = graph.add_kernel_node(params, &[]).expect("add kernel");
assert_eq!(node_id, 0);
assert_eq!(graph.node_count(), 1);
}
#[test]
fn test_cuda_graph_execution() {
let mut graph = CudaGraph::new();
let n1 = graph
.add_kernel_node(KernelNodeParams::default(), &[])
.expect("add");
let n2 = graph
.add_kernel_node(KernelNodeParams::default(), &[n1])
.expect("add");
let _n3 = graph
.add_memcpy_node(
MemCopyNodeParams {
src: 0,
dst: 1,
size: 1024,
kind: MemCopyKind::DeviceToDevice,
},
&[n2],
)
.expect("add memcpy");
graph.finalize().expect("finalize");
let stats = graph.get_stats();
assert_eq!(stats.node_count, 3);
assert_eq!(stats.kernel_count, 2);
assert_eq!(stats.mem_op_count, 1);
let exec = graph.instantiate().expect("instantiate");
assert_eq!(exec.execution_count(), 0);
}
#[test]
fn test_quantum_graph_scheduler() {
let mut scheduler = QuantumGraphScheduler::new(10);
let _exec = scheduler
.get_or_create("bell_circuit", || {
let mut graph = CudaGraph::new();
graph.add_kernel_node(
KernelNodeParams {
function: 1, grid_dim: (1, 1, 1),
block_dim: (256, 1, 1),
..Default::default()
},
&[],
)?;
graph.add_kernel_node(
KernelNodeParams {
function: 2, grid_dim: (1, 1, 1),
block_dim: (256, 1, 1),
..Default::default()
},
&[0],
)?;
graph.finalize()?;
Ok(graph)
})
.expect("create graph");
let (hits, misses) = scheduler.cache_stats();
assert_eq!(misses, 1);
assert_eq!(hits, 0);
}
}