use crate::core::TimeSeries;
use crate::core::VisibilityGraph;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GpuBackend {
Cuda,
Metal,
OpenCL,
Auto,
}
#[derive(Debug, Clone)]
pub struct GpuConfig {
pub backend: GpuBackend,
pub min_nodes_for_gpu: usize,
pub block_size: usize,
pub max_batch_nodes: usize,
pub use_pinned_memory: bool,
}
impl Default for GpuConfig {
fn default() -> Self {
Self {
backend: GpuBackend::Auto,
min_nodes_for_gpu: 5000,
block_size: 256,
max_batch_nodes: 100000,
use_pinned_memory: true,
}
}
}
impl GpuConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_backend(mut self, backend: GpuBackend) -> Self {
self.backend = backend;
self
}
pub fn with_min_nodes(mut self, min_nodes: usize) -> Self {
self.min_nodes_for_gpu = min_nodes;
self
}
pub fn for_medium_graphs() -> Self {
Self {
min_nodes_for_gpu: 1000,
block_size: 256,
..Default::default()
}
}
pub fn for_large_graphs() -> Self {
Self {
min_nodes_for_gpu: 5000,
block_size: 512,
..Default::default()
}
}
pub fn for_massive_graphs() -> Self {
Self {
min_nodes_for_gpu: 10000,
block_size: 1024,
max_batch_nodes: 200000,
..Default::default()
}
}
pub fn for_apple_silicon() -> Self {
Self {
backend: GpuBackend::Metal,
min_nodes_for_gpu: 2000, block_size: 256, max_batch_nodes: 150000,
use_pinned_memory: false, }
}
}
pub struct GpuCapabilities {
cuda_available: bool,
metal_available: bool,
opencl_available: bool,
neural_engine_available: bool,
gpu_count: usize,
gpu_memory_mb: Vec<usize>,
}
impl GpuCapabilities {
pub fn detect() -> Self {
#[cfg(target_os = "macos")]
{
Self::detect_metal()
}
#[cfg(all(feature = "cuda", not(target_os = "macos")))]
{
return Self::detect_cuda();
}
#[cfg(not(any(feature = "cuda", target_os = "macos")))]
{
Self {
cuda_available: false,
metal_available: false,
opencl_available: false,
neural_engine_available: false,
gpu_count: 0,
gpu_memory_mb: vec![],
}
}
}
#[cfg(target_os = "macos")]
fn detect_metal() -> Self {
#[cfg(target_arch = "aarch64")]
{
Self {
cuda_available: false,
metal_available: true,
opencl_available: false,
neural_engine_available: true,
gpu_count: 1,
gpu_memory_mb: vec![16384], }
}
#[cfg(target_arch = "x86_64")]
{
Self {
cuda_available: false,
metal_available: true,
opencl_available: true,
neural_engine_available: false,
gpu_count: 1,
gpu_memory_mb: vec![4096],
}
}
}
#[cfg(feature = "cuda")]
fn detect_cuda() -> Self {
Self {
cuda_available: false, metal_available: false,
opencl_available: false,
neural_engine_available: false,
gpu_count: 0,
gpu_memory_mb: vec![],
}
}
pub fn has_cuda(&self) -> bool {
self.cuda_available
}
pub fn has_metal(&self) -> bool {
self.metal_available
}
pub fn has_neural_engine(&self) -> bool {
self.neural_engine_available
}
pub fn has_opencl(&self) -> bool {
self.opencl_available
}
pub fn has_gpu(&self) -> bool {
self.cuda_available || self.metal_available || self.opencl_available
}
pub fn gpu_count(&self) -> usize {
self.gpu_count
}
pub fn best_backend(&self) -> GpuBackend {
if self.metal_available {
GpuBackend::Metal
} else if self.cuda_available {
GpuBackend::Cuda
} else if self.opencl_available {
GpuBackend::OpenCL
} else {
GpuBackend::Auto
}
}
pub fn print_info(&self) {
println!("GPU Capabilities:");
println!(" CUDA Available: {}", if self.cuda_available { "✓" } else { "✗" });
println!(" Metal Available: {}", if self.metal_available { "✓" } else { "✗" });
if self.neural_engine_available {
println!(" Neural Engine: ✓ (Apple Silicon)");
}
println!(" OpenCL Available: {}", if self.opencl_available { "✓" } else { "✗" });
println!(" GPU Count: {}", self.gpu_count);
if !self.gpu_memory_mb.is_empty() {
println!(" GPU Memory:");
for (i, mem) in self.gpu_memory_mb.iter().enumerate() {
if self.metal_available && self.neural_engine_available {
println!(" GPU {}: {} MB (Unified Memory)", i, mem);
} else {
println!(" GPU {}: {} MB", i, mem);
}
}
}
if self.has_gpu() {
println!(" Recommended Backend: {:?}", self.best_backend());
} else {
println!(" Note: No GPU available, will use optimized CPU implementation");
}
}
}
pub struct GpuVisibilityGraph {
config: GpuConfig,
capabilities: GpuCapabilities,
}
impl GpuVisibilityGraph {
pub fn new() -> Self {
Self {
config: GpuConfig::default(),
capabilities: GpuCapabilities::detect(),
}
}
pub fn with_config(config: GpuConfig) -> Self {
Self {
config,
capabilities: GpuCapabilities::detect(),
}
}
pub fn should_use_gpu(&self, node_count: usize) -> bool {
self.capabilities.has_gpu() && node_count >= self.config.min_nodes_for_gpu
}
pub fn build_natural<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
if self.should_use_gpu(series.len()) {
self.build_natural_gpu(series)
} else {
self.build_natural_cpu(series)
}
}
fn build_natural_gpu<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
{
if self.capabilities.has_metal() {
return self.build_natural_metal(series);
}
}
#[cfg(feature = "cuda")]
{
if self.capabilities.has_cuda() {
return self.build_natural_cpu(series);
}
}
self.build_natural_cpu(series)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
fn build_natural_metal<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
use crate::performance::metal::MetalVisibilityPipeline;
let data: Vec<f64> = series.values.iter()
.filter_map(|&opt| opt)
.map(|x| x.into())
.collect();
let pipeline = MetalVisibilityPipeline::new()
.map_err(|e| format!("Failed to create Metal pipeline: {}", e))?;
let edges = pipeline.compute_natural_visibility(&data)
.map_err(|e| format!("Metal computation failed: {}", e))?;
let mut graph = VisibilityGraph::from_series(series)
.natural_visibility()
.map_err(|e| format!("Graph construction failed: {:?}", e))?;
graph.edges.clear();
for (src, dst) in edges {
graph.edges.insert((src, dst), 1.0);
}
Ok(graph)
}
fn build_natural_cpu<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
VisibilityGraph::from_series(series)
.natural_visibility()
.map_err(|e| format!("CPU build failed: {:?}", e))
}
pub fn build_horizontal<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
if self.should_use_gpu(series.len()) {
self.build_horizontal_gpu(series)
} else {
self.build_horizontal_cpu(series)
}
}
fn build_horizontal_gpu<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
{
if self.capabilities.has_metal() {
return self.build_horizontal_metal(series);
}
}
self.build_horizontal_cpu(series)
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
fn build_horizontal_metal<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
use crate::performance::metal::MetalVisibilityPipeline;
let data: Vec<f64> = series.values.iter()
.filter_map(|&opt| opt)
.map(|x| x.into())
.collect();
let pipeline = MetalVisibilityPipeline::new()
.map_err(|e| format!("Failed to create Metal pipeline: {}", e))?;
let edges = pipeline.compute_horizontal_visibility(&data)
.map_err(|e| format!("Metal computation failed: {}", e))?;
let mut graph = VisibilityGraph::from_series(series)
.horizontal_visibility()
.map_err(|e| format!("Graph construction failed: {:?}", e))?;
graph.edges.clear();
for (src, dst) in edges {
graph.edges.insert((src, dst), 1.0);
}
Ok(graph)
}
fn build_horizontal_cpu<T>(
&self,
series: &TimeSeries<T>,
) -> Result<VisibilityGraph<T>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
VisibilityGraph::from_series(series)
.horizontal_visibility()
.map_err(|e| format!("CPU build failed: {:?}", e))
}
pub fn build_natural_batch<T>(
&self,
series_batch: &[TimeSeries<T>],
) -> Result<Vec<VisibilityGraph<T>>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
if series_batch.is_empty() {
return Ok(Vec::new());
}
let avg_size: usize = series_batch.iter().map(|s| s.len()).sum::<usize>() / series_batch.len();
if !self.should_use_gpu(avg_size) {
return series_batch.iter()
.map(|s| self.build_natural_cpu(s))
.collect();
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
{
if self.capabilities.has_metal() {
return self.build_natural_batch_metal(series_batch);
}
}
series_batch.iter()
.map(|s| self.build_natural_cpu(s))
.collect()
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
fn build_natural_batch_metal<T>(
&self,
series_batch: &[TimeSeries<T>],
) -> Result<Vec<VisibilityGraph<T>>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
use crate::performance::metal::MetalVisibilityPipeline;
let data_batch: Vec<Vec<f64>> = series_batch.iter()
.map(|s| s.values.iter()
.filter_map(|&opt| opt)
.map(|x| x.into())
.collect())
.collect();
let data_refs: Vec<&[f64]> = data_batch.iter().map(|v| v.as_slice()).collect();
let pipeline = MetalVisibilityPipeline::new()
.map_err(|e| format!("Failed to create Metal pipeline: {}", e))?;
let edges_batch = pipeline.compute_natural_visibility_batch(&data_refs)
.map_err(|e| format!("Metal batch computation failed: {}", e))?;
let mut graphs = Vec::with_capacity(series_batch.len());
for (series, edges) in series_batch.iter().zip(edges_batch.iter()) {
let mut graph = VisibilityGraph::from_series(series)
.natural_visibility()
.map_err(|e| format!("Graph construction failed: {:?}", e))?;
graph.edges.clear();
for &(src, dst) in edges {
graph.edges.insert((src, dst), 1.0);
}
graphs.push(graph);
}
Ok(graphs)
}
pub fn build_horizontal_batch<T>(
&self,
series_batch: &[TimeSeries<T>],
) -> Result<Vec<VisibilityGraph<T>>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
if series_batch.is_empty() {
return Ok(Vec::new());
}
let avg_size: usize = series_batch.iter().map(|s| s.len()).sum::<usize>() / series_batch.len();
if !self.should_use_gpu(avg_size) {
return series_batch.iter()
.map(|s| self.build_horizontal_cpu(s))
.collect();
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
{
if self.capabilities.has_metal() {
return self.build_horizontal_batch_metal(series_batch);
}
}
series_batch.iter()
.map(|s| self.build_horizontal_cpu(s))
.collect()
}
#[cfg(all(target_os = "macos", target_arch = "aarch64", feature = "metal"))]
fn build_horizontal_batch_metal<T>(
&self,
series_batch: &[TimeSeries<T>],
) -> Result<Vec<VisibilityGraph<T>>, String>
where
T: Copy + PartialOrd + Into<f64> + From<f64>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Mul<Output = T>
+ std::ops::Div<Output = T>
+ Send + Sync,
{
use crate::performance::metal::MetalVisibilityPipeline;
let data_batch: Vec<Vec<f64>> = series_batch.iter()
.map(|s| s.values.iter()
.filter_map(|&opt| opt)
.map(|x| x.into())
.collect())
.collect();
let data_refs: Vec<&[f64]> = data_batch.iter().map(|v| v.as_slice()).collect();
let pipeline = MetalVisibilityPipeline::new()
.map_err(|e| format!("Failed to create Metal pipeline: {}", e))?;
let edges_batch = pipeline.compute_horizontal_visibility_batch(&data_refs)
.map_err(|e| format!("Metal batch computation failed: {}", e))?;
let mut graphs = Vec::with_capacity(series_batch.len());
for (series, edges) in series_batch.iter().zip(edges_batch.iter()) {
let mut graph = VisibilityGraph::from_series(series)
.horizontal_visibility()
.map_err(|e| format!("Graph construction failed: {:?}", e))?;
graph.edges.clear();
for &(src, dst) in edges {
graph.edges.insert((src, dst), 1.0);
}
graphs.push(graph);
}
Ok(graphs)
}
}
impl Default for GpuVisibilityGraph {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "cuda")]
mod cuda_kernels {
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_config() {
let config = GpuConfig::new();
assert_eq!(config.min_nodes_for_gpu, 5000);
let config = GpuConfig::for_medium_graphs();
assert_eq!(config.min_nodes_for_gpu, 1000);
}
#[test]
fn test_gpu_detection() {
let caps = GpuCapabilities::detect();
caps.print_info();
}
#[test]
fn test_should_use_gpu() {
let gpu = GpuVisibilityGraph::new();
assert!(!gpu.should_use_gpu(100));
let _would_use_gpu = gpu.should_use_gpu(10000);
}
}