fast-umap 1.5.1

Configurable UMAP (Uniform Manifold Approximation and Projection) in Rust
Documentation
//! CPU backend implementation
//!
//! This module provides CPU-based UMAP functionality as a fallback when GPU is not available.
//! Currently implements utility functions and provides a foundation for full CPU support.

use crate::{
    distances::*,
    train::*,
    utils::*,
};
use burn::tensor::{Device, Tensor};
use ndarray::Array2;
use num::Float;
use std::time::Instant;

/// CPU-specific UMAP implementation
pub struct CpuUmap {
    config: UmapConfig,
}

impl CpuUmap {
    /// Create a new CPU UMAP instance
    pub fn new(config: UmapConfig) -> Self {
        Self { config }
    }

    /// Fit UMAP using CPU implementation
    /// Currently provides utility functions and tensor operations
    pub fn fit<F: Float>(&self, data: Vec<Vec<F>>, labels: Option<Vec<String>>) -> CpuFittedUmap
    where
        F: num::FromPrimitive + Into<f64>,
    {
        let start_time = Instant::now();

        // Convert data to flat format
        let n_samples = data.len();
        let n_features = data[0].len();
        
        let mut flat_data = Vec::with_capacity(n_samples * n_features);
        for sample in &data {
            for &val in sample {
                flat_data.push(val.into() as f64);
            }
        }

        // Normalize data (CPU operation)
        normalize_data(&mut flat_data, n_samples, n_features);

        // Create a simple 2D embedding using PCA-like approach for demo
        // In a full implementation, this would use proper UMAP algorithm
        let embedding = create_simple_embedding(n_samples, n_features, &flat_data);

        if self.config.optimization.verbose {
            println!(
                "[fast-umap CPU] Processing complete in {:.2}s",
                start_time.elapsed().as_secs_f64()
            );
        }

        CpuFittedUmap {
            embedding,
            config: self.config.clone(),
        }
    }
}

/// Create a simple 2D embedding for demonstration
/// This is a placeholder - in a full implementation, this would use proper UMAP
fn create_simple_embedding(n_samples: usize, n_features: usize, data: &[f64]) -> Vec<Vec<f64>> {
    // Simple dimensionality reduction: take first 2 principal components
    // This is just for demonstration - real UMAP would be more sophisticated
    let mut embedding = vec![vec![0.0, 0.0]; n_samples];
    
    // Simple projection: use first two features scaled
    for i in 0..n_samples {
        let base_idx = i * n_features;
        if n_features >= 2 {
            embedding[i][0] = data[base_idx] * 10.0;
            embedding[i][1] = data[base_idx + 1] * 10.0;
        }
    }
    
    embedding
}

/// Fitted UMAP model for CPU backend
pub struct CpuFittedUmap {
    embedding: Vec<Vec<f64>>,
    config: UmapConfig,
}

impl CpuFittedUmap {
    /// Get the computed embedding
    pub fn embedding(&self) -> &Vec<Vec<f64>> {
        &self.embedding
    }

    /// Consume the fitted model and return the embedding
    pub fn into_embedding(self) -> Vec<Vec<f64>> {
        self.embedding
    }

    /// Get a reference to the configuration
    pub fn config(&self) -> &UmapConfig {
        &self.config
    }

    /// Note: CPU backend cannot transform new data (classical UMAP limitation)
    /// This method will panic if called
    pub fn transform(&self, _data: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
        panic!("CPU backend does not support transforming new data. Use GPU backend for parametric UMAP with transform support.");
    }
}

/// Public API for CPU backend
pub mod api {
    use super::*;
    
    /// CPU-specific UMAP fit function
    pub fn fit_cpu<F: Float>(
        config: UmapConfig,
        data: Vec<Vec<F>>,
        labels: Option<Vec<String>>,
    ) -> CpuFittedUmap
    where
        F: num::FromPrimitive + Into<f64>,
    {
        let cpu_umap = CpuUmap::new(config);
        cpu_umap.fit(data, labels)
    }
}