use crate::error::{ClusteringError, Result};
use scirs2_core::ndarray::{Array2, ArrayView1, ArrayView2};
use scirs2_core::numeric::{Float, FromPrimitive};
use serde::{Deserialize, Serialize};
use super::core::{GpuConfig, GpuContext};
use super::memory::{GpuMemoryManager, MemoryTransfer};
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum DistanceMetric {
Euclidean,
Manhattan,
Cosine,
Minkowski(f64),
SquaredEuclidean,
Chebyshev,
Hamming,
}
impl Default for DistanceMetric {
fn default() -> Self {
DistanceMetric::Euclidean
}
}
impl std::fmt::Display for DistanceMetric {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DistanceMetric::Euclidean => write!(f, "euclidean"),
DistanceMetric::Manhattan => write!(f, "manhattan"),
DistanceMetric::Cosine => write!(f, "cosine"),
DistanceMetric::Minkowski(p) => write!(f, "minkowski(p={})", p),
DistanceMetric::SquaredEuclidean => write!(f, "squared_euclidean"),
DistanceMetric::Chebyshev => write!(f, "chebyshev"),
DistanceMetric::Hamming => write!(f, "hamming"),
}
}
}
#[derive(Debug)]
pub struct GpuDistanceMatrix<F: Float> {
context: GpuContext,
metric: DistanceMetric,
gpu_data: Option<GpuArray<F>>,
tile_size: usize,
use_shared_memory: bool,
memory_manager: GpuMemoryManager,
}
#[derive(Debug)]
pub struct GpuArray<F: Float> {
device_ptr: usize,
shape: [usize; 2],
element_size: usize,
on_device: bool,
_phantom: std::marker::PhantomData<F>,
}
impl<F: Float + FromPrimitive + Send + Sync> GpuDistanceMatrix<F> {
pub fn new(
gpu_config: GpuConfig,
metric: DistanceMetric,
tile_size: Option<usize>,
) -> Result<Self> {
let device = Self::detect_gpu_device(&gpu_config)?;
let context = GpuContext::new(device, gpu_config)?;
let optimal_tile_size =
tile_size.unwrap_or_else(|| Self::calculate_optimal_tile_size(&context));
let memory_manager = GpuMemoryManager::new(256, 100);
Ok(Self {
context,
metric,
gpu_data: None,
tile_size: optimal_tile_size,
use_shared_memory: true,
memory_manager,
})
}
pub fn preload_data(&mut self, data: ArrayView2<F>) -> Result<()> {
let shape = [data.nrows(), data.ncols()];
let mut gpu_data = GpuArray::allocate(shape)?;
gpu_data.copy_from_host(data)?;
self.gpu_data = Some(gpu_data);
Ok(())
}
pub fn compute_distance_matrix(&mut self, data: ArrayView2<F>) -> Result<Array2<F>> {
let n_samples = data.nrows();
let mut result = Array2::zeros((n_samples, n_samples));
if !self.context.is_gpu_accelerated() {
return self.compute_distance_matrix_cpu(data);
}
if self.gpu_data.is_none() {
self.preload_data(data)?;
}
for i in (0..n_samples).step_by(self.tile_size) {
for j in (0..n_samples).step_by(self.tile_size) {
let i_end = (i + self.tile_size).min(n_samples);
let j_end = (j + self.tile_size).min(n_samples);
let tile_result = self.compute_distance_tile(i, i_end, j, j_end)?;
for (ii, row) in tile_result.rows().into_iter().enumerate() {
for (jj, &val) in row.iter().enumerate() {
if i + ii < n_samples && j + jj < n_samples {
result[[i + ii, j + jj]] = val;
}
}
}
}
}
Ok(result)
}
pub fn compute_distances_to_centroids(
&mut self,
data: ArrayView2<F>,
centroids: ArrayView2<F>,
) -> Result<Array2<F>> {
let n_samples = data.nrows();
let n_centroids = centroids.nrows();
let mut result = Array2::zeros((n_samples, n_centroids));
if !self.context.is_gpu_accelerated() {
return self.compute_distances_to_centroids_cpu(data, centroids);
}
for i in (0..n_samples).step_by(self.tile_size) {
let i_end = (i + self.tile_size).min(n_samples);
for j in (0..n_centroids).step_by(self.tile_size) {
let j_end = (j + self.tile_size).min(n_centroids);
let tile_result =
self.compute_centroid_distance_tile(data, centroids, i, i_end, j, j_end)?;
for (ii, row) in tile_result.rows().into_iter().enumerate() {
for (jj, &val) in row.iter().enumerate() {
if i + ii < n_samples && j + jj < n_centroids {
result[[i + ii, j + jj]] = val;
}
}
}
}
}
Ok(result)
}
pub fn find_k_nearest(
&mut self,
query: ArrayView1<F>,
data: ArrayView2<F>,
k: usize,
) -> Result<(Vec<usize>, Vec<F>)> {
if k == 0 || k > data.nrows() {
return Err(ClusteringError::InvalidInput(
"Invalid k value for k-nearest neighbors".to_string(),
));
}
let distances = self.compute_point_distances(query, data)?;
let mut indexed_distances: Vec<(usize, F)> =
distances.iter().enumerate().map(|(i, &d)| (i, d)).collect();
indexed_distances
.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let indices = indexed_distances.iter().take(k).map(|(i, _)| *i).collect();
let distances = indexed_distances.iter().take(k).map(|(_, d)| *d).collect();
Ok((indices, distances))
}
fn compute_point_distances(
&mut self,
query: ArrayView1<F>,
data: ArrayView2<F>,
) -> Result<Vec<F>> {
let n_samples = data.nrows();
let mut distances = vec![F::zero(); n_samples];
for (i, data_point) in data.rows().into_iter().enumerate() {
distances[i] = self.compute_single_distance(query, data_point)?;
}
Ok(distances)
}
fn compute_single_distance(&self, point1: ArrayView1<F>, point2: ArrayView1<F>) -> Result<F> {
if point1.len() != point2.len() {
return Err(ClusteringError::InvalidInput(
"Points must have same dimensionality".to_string(),
));
}
let distance = match self.metric {
DistanceMetric::Euclidean => {
let sum_sq: F = point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| (a - b) * (a - b))
.fold(F::zero(), |acc, x| acc + x);
sum_sq.sqrt()
}
DistanceMetric::SquaredEuclidean => point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| (a - b) * (a - b))
.fold(F::zero(), |acc, x| acc + x),
DistanceMetric::Manhattan => point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| (a - b).abs())
.fold(F::zero(), |acc, x| acc + x),
DistanceMetric::Cosine => {
let dot_product = point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| a * b)
.fold(F::zero(), |acc, x| acc + x);
let norm1 = point1
.iter()
.map(|&x| x * x)
.fold(F::zero(), |acc, x| acc + x)
.sqrt();
let norm2 = point2
.iter()
.map(|&x| x * x)
.fold(F::zero(), |acc, x| acc + x)
.sqrt();
if norm1 == F::zero() || norm2 == F::zero() {
F::one()
} else {
F::one() - (dot_product / (norm1 * norm2))
}
}
DistanceMetric::Chebyshev => point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| (a - b).abs())
.fold(F::zero(), |acc, x| if x > acc { x } else { acc }),
DistanceMetric::Minkowski(p) => {
let p_f = F::from(p).unwrap_or(F::one());
let sum: F = point1
.iter()
.zip(point2.iter())
.map(|(&a, &b)| (a - b).abs().powf(p_f))
.fold(F::zero(), |acc, x| acc + x);
sum.powf(F::one() / p_f)
}
DistanceMetric::Hamming => {
let threshold = F::from(0.5).unwrap_or(F::zero());
let count = point1
.iter()
.zip(point2.iter())
.filter(|(&a, &b)| (a - b).abs() > threshold)
.count();
F::from(count).unwrap_or(F::zero())
}
};
Ok(distance)
}
pub fn compute_distance_matrix_cpu(&self, data: ArrayView2<F>) -> Result<Array2<F>> {
let n_samples = data.nrows();
let mut result = Array2::zeros((n_samples, n_samples));
for i in 0..n_samples {
for j in i..n_samples {
let distance = self.compute_single_distance(data.row(i), data.row(j))?;
result[[i, j]] = distance;
result[[j, i]] = distance;
}
}
Ok(result)
}
fn compute_distances_to_centroids_cpu(
&self,
data: ArrayView2<F>,
centroids: ArrayView2<F>,
) -> Result<Array2<F>> {
let n_samples = data.nrows();
let n_centroids = centroids.nrows();
let mut result = Array2::zeros((n_samples, n_centroids));
for i in 0..n_samples {
for j in 0..n_centroids {
let distance = self.compute_single_distance(data.row(i), centroids.row(j))?;
result[[i, j]] = distance;
}
}
Ok(result)
}
fn compute_distance_tile(
&self,
_i_start: usize,
_i_end: usize,
_j_start: usize,
_j_end: usize,
) -> Result<Array2<F>> {
Ok(Array2::zeros((1, 1)))
}
fn compute_centroid_distance_tile(
&self,
_data: ArrayView2<F>,
_centroids: ArrayView2<F>,
_i_start: usize,
_i_end: usize,
_j_start: usize,
_j_end: usize,
) -> Result<Array2<F>> {
Ok(Array2::zeros((1, 1)))
}
fn detect_gpu_device(config: &GpuConfig) -> Result<super::core::GpuDevice> {
Ok(super::core::GpuDevice::new(
0,
"Stub GPU".to_string(),
8_000_000_000,
6_000_000_000,
"1.0".to_string(),
1024,
config.preferred_backend,
true,
))
}
fn calculate_optimal_tile_size(context: &GpuContext) -> usize {
let (total_memory, available_memory) = context.memory_info();
let compute_units = context.device.compute_units as usize;
let memory_based = (available_memory / (8 * std::mem::size_of::<F>())).min(1024);
let compute_based = (compute_units * 32).min(512);
memory_based.min(compute_based).max(32)
}
}
impl<F: Float> GpuArray<F> {
pub fn allocate(shape: [usize; 2]) -> Result<Self> {
let element_size = std::mem::size_of::<F>();
let total_size = shape[0] * shape[1] * element_size;
let device_ptr = 0x2000_0000;
Ok(Self {
device_ptr,
shape,
element_size,
on_device: true,
_phantom: std::marker::PhantomData,
})
}
pub fn copy_from_host(&mut self, _data: ArrayView2<F>) -> Result<()> {
self.on_device = true;
Ok(())
}
pub fn copy_to_host(&self) -> Result<Array2<F>> {
Ok(Array2::zeros((self.shape[0], self.shape[1])))
}
pub fn shape(&self) -> [usize; 2] {
self.shape
}
pub fn is_on_device(&self) -> bool {
self.on_device
}
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_distance_metrics() {
let point1 = scirs2_core::ndarray::arr1(&[1.0, 2.0, 3.0]);
let point2 = scirs2_core::ndarray::arr1(&[4.0, 5.0, 6.0]);
let config = GpuConfig::default();
let matrix = GpuDistanceMatrix::<f64>::new(config, DistanceMetric::Euclidean, None)
.expect("Operation failed");
let distance = matrix
.compute_single_distance(point1.view(), point2.view())
.expect("Operation failed");
assert!((distance - 5.196152422706632).abs() < 1e-10);
}
#[test]
fn test_gpu_array_allocation() {
let array = GpuArray::<f32>::allocate([100, 50]).expect("Operation failed");
assert_eq!(array.shape(), [100, 50]);
assert!(array.is_on_device());
}
#[test]
fn test_distance_matrix_cpu_fallback() {
let data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
.expect("Operation failed");
let config = GpuConfig::default();
let matrix = GpuDistanceMatrix::new(config, DistanceMetric::Euclidean, None)
.expect("Operation failed");
let result = matrix
.compute_distance_matrix_cpu(data.view())
.expect("Operation failed");
assert_eq!(result.shape(), &[3, 3]);
assert!((result[[0, 0]] - 0.0).abs() < 1e-10);
}
#[test]
fn test_k_nearest_neighbors() {
let query = scirs2_core::ndarray::arr1(&[1.0, 1.0]);
let data = Array2::from_shape_vec((4, 2), vec![0.0, 0.0, 2.0, 2.0, 3.0, 3.0, 1.0, 1.0])
.expect("Operation failed");
let config = GpuConfig::default();
let mut matrix = GpuDistanceMatrix::new(config, DistanceMetric::Euclidean, None)
.expect("Operation failed");
let (indices, distances) = matrix
.find_k_nearest(query.view(), data.view(), 2)
.expect("Operation failed");
assert_eq!(indices.len(), 2);
assert_eq!(distances.len(), 2);
assert_eq!(indices[0], 3); }
}