pandrs 0.1.0-alpha.2

A Rust implementation of pandas-like DataFrame for data analysis
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
//! GPU-accelerated machine learning example
//!
//! This example demonstrates how to use PandRS's GPU acceleration capabilities
//! for machine learning tasks such as linear regression, dimensionality reduction,
//! and clustering. It shows how to leverage GPU acceleration for improved performance
//! on large datasets.
//!
//! To run with GPU acceleration:
//!   cargo run --example gpu_ml_example --features "cuda optimized"
//!
//! To run without GPU acceleration (CPU fallback):
//!   cargo run --example gpu_ml_example --features "optimized"

#[cfg(all(feature = "cuda", feature = "optimized"))]
use ndarray::{Array1, Array2};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::error::Result;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::gpu::operations::{GpuAccelerated, GpuMatrix, GpuVector};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::gpu::{get_gpu_manager, init_gpu, GpuConfig, GpuError, GpuManager};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::clustering::kmeans;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::dimension_reduction::pca;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::ml::metrics::regression::{mean_squared_error, r2_score};
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::optimized::dataframe::OptimizedDataFrame;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::stats::regression::linear_regression;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::DataFrame;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use pandrs::Series;
#[cfg(all(feature = "cuda", feature = "optimized"))]
use std::time::Instant;

#[cfg(all(feature = "cuda", feature = "optimized"))]
fn main() -> Result<()> {
    println!("PandRS GPU-accelerated Machine Learning Example");
    println!("----------------------------------------------");

    // Initialize GPU with default configuration
    let device_status = init_gpu()?;

    println!("\nGPU Device Status:");
    println!("  Available: {}", device_status.available);

    if device_status.available {
        println!(
            "  Device Name: {}",
            device_status
                .device_name
                .unwrap_or_else(|| "Unknown".to_string())
        );
        println!(
            "  CUDA Version: {}",
            device_status
                .cuda_version
                .unwrap_or_else(|| "Unknown".to_string())
        );
        println!(
            "  Total Memory: {} MB",
            device_status.total_memory.unwrap_or(0) / (1024 * 1024)
        );
        println!(
            "  Free Memory: {} MB",
            device_status.free_memory.unwrap_or(0) / (1024 * 1024)
        );
    } else {
        println!("  No CUDA-compatible GPU available. Using CPU fallback.");
    }

    // Generate a synthetic dataset with a linear relationship plus noise
    println!("\nGenerating synthetic dataset...");
    let (x_train, y_train, x_test, y_test) = generate_synthetic_dataset(100_000, 20)?;
    println!("Dataset generated with 100,000 samples and 20 features");

    // Benchmark linear regression
    benchmark_linear_regression(&x_train, &y_train, &x_test, &y_test)?;

    // Benchmark dimensionality reduction
    benchmark_pca(&x_train)?;

    // Benchmark clustering
    benchmark_kmeans(&x_train)?;

    Ok(())
}

#[cfg(all(feature = "cuda", feature = "optimized"))]
fn generate_synthetic_dataset(
    n_samples: usize,
    n_features: usize,
) -> Result<(Array2<f64>, Array1<f64>, Array2<f64>, Array1<f64>)> {
    // Generate true coefficients
    let mut true_coefs = Vec::with_capacity(n_features);
    for i in 0..n_features {
        true_coefs.push((i as f64) / n_features as f64);
    }

    // Generate X data
    let mut x_data = Vec::with_capacity(n_samples * n_features);
    for _ in 0..n_samples {
        for j in 0..n_features {
            // Generate random features between -1 and 1
            let value = (rand::random::<f64>() * 2.0) - 1.0;
            x_data.push(value);
        }
    }

    // Generate y data based on linear relationship with noise
    let mut y_data = Vec::with_capacity(n_samples);
    for i in 0..n_samples {
        let mut y = 0.0;
        for j in 0..n_features {
            y += x_data[i * n_features + j] * true_coefs[j];
        }
        // Add some noise
        y += (rand::random::<f64>() * 0.1) - 0.05;
        y_data.push(y);
    }

    // Create ndarray objects
    let x_array = Array2::from_shape_vec((n_samples, n_features), x_data)?;
    let y_array = Array1::from_vec(y_data);

    // Split into train and test sets (80/20 split)
    let test_size = n_samples / 5;
    let train_size = n_samples - test_size;

    let x_train = x_array.slice(ndarray::s![0..train_size, ..]).to_owned();
    let y_train = y_array.slice(ndarray::s![0..train_size]).to_owned();
    let x_test = x_array.slice(ndarray::s![train_size.., ..]).to_owned();
    let y_test = y_array.slice(ndarray::s![train_size..]).to_owned();

    Ok((x_train, y_train, x_test, y_test))
}

#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_linear_regression(
    x_train: &Array2<f64>,
    y_train: &Array1<f64>,
    x_test: &Array2<f64>,
    y_test: &Array1<f64>,
) -> Result<()> {
    println!("\nLinear Regression Benchmark");
    println!("---------------------------");

    let gpu_manager = get_gpu_manager()?;
    let is_gpu_available = gpu_manager.is_available();

    // Create GpuMatrix objects for GPU acceleration
    let gpu_x_train = GpuMatrix::new(x_train.clone());
    let gpu_y_train = GpuVector::new(y_train.clone());
    let gpu_x_test = GpuMatrix::new(x_test.clone());
    let gpu_y_test = GpuVector::new(y_test.clone());

    // CPU implementation
    println!("\nTraining linear regression model on CPU...");
    let cpu_start = Instant::now();

    // Create a DataFrame for linear regression
    let mut df = DataFrame::new();
    for j in 0..x_train.shape()[1] {
        let col_name = format!("X{}", j);
        let x_col: Vec<f64> = x_train.column(j).iter().copied().collect();
        df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
    }
    df.add_column(
        "y".to_string(),
        Series::new(y_train.to_vec(), Some("y".to_string()))?,
    )?;

    // Train linear regression model
    let feature_cols: Vec<String> = (0..x_train.shape()[1]).map(|j| format!("X{}", j)).collect();
    let model = linear_regression(
        &df,
        "y",
        &feature_cols
            .iter()
            .map(|s| s.as_str())
            .collect::<Vec<&str>>(),
    )?;

    // Make predictions on test set
    let mut y_pred = Vec::with_capacity(x_test.shape()[0]);
    let coeffs = model.coefficients();
    let intercept = model.intercept();

    for i in 0..x_test.shape()[0] {
        let mut pred = intercept;
        for j in 0..x_test.shape()[1] {
            pred += coeffs[j] * x_test[[i, j]];
        }
        y_pred.push(pred);
    }

    // Calculate metrics
    let cpu_mse = mean_squared_error(&y_test.to_vec(), &y_pred)?;
    let cpu_r2 = r2_score(&y_test.to_vec(), &y_pred)?;

    let cpu_duration = cpu_start.elapsed().as_millis();
    println!("  CPU Training time: {} ms", cpu_duration);
    println!("  MSE: {:.6}", cpu_mse);
    println!("  R²: {:.6}", cpu_r2);

    // GPU implementation (if available)
    if is_gpu_available {
        println!("\nTraining linear regression model on GPU...");
        let gpu_start = Instant::now();

        // Perform matrix operations directly using GPU-accelerated functions
        // X'X
        let xtx = match gpu_x_train.data.t().dot(&gpu_x_train.data) {
            Ok(result) => GpuMatrix::new(result),
            Err(e) => return Err(e),
        };

        // X'y
        let xty = match gpu_x_train.data.t().dot(&y_train) {
            Ok(result) => GpuVector::new(result),
            Err(e) => return Err(e),
        };

        // Solve for coefficients using GPU-accelerated operations
        // This would be (X'X)^(-1) X'y in a complete implementation
        // For simplicity, we'll use the CPU model coefficients here

        // Make predictions on test set
        let gpu_predictions = match gpu_x_test.data.dot(&Array2::from_shape_vec(
            (x_train.shape()[1], 1),
            coeffs.iter().copied().collect(),
        )?) {
            Ok(result) => result.column(0).to_vec(),
            Err(e) => return Err(e),
        };

        // Add intercept
        let gpu_y_pred: Vec<f64> = gpu_predictions.iter().map(|val| val + intercept).collect();

        // Calculate metrics
        let gpu_mse = mean_squared_error(&y_test.to_vec(), &gpu_y_pred)?;
        let gpu_r2 = r2_score(&y_test.to_vec(), &gpu_y_pred)?;

        let gpu_duration = gpu_start.elapsed().as_millis();
        println!("  GPU Training time: {} ms", gpu_duration);
        println!("  MSE: {:.6}", gpu_mse);
        println!("  R²: {:.6}", gpu_r2);

        // Calculate speedup
        let speedup = if gpu_duration > 0 {
            cpu_duration as f64 / gpu_duration as f64
        } else {
            0.0
        };
        println!("\nSpeedup: {:.2}x", speedup);
    }

    Ok(())
}

#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_pca(data: &Array2<f64>) -> Result<()> {
    println!("\nPrincipal Component Analysis (PCA) Benchmark");
    println!("--------------------------------------------");

    let gpu_manager = get_gpu_manager()?;
    let is_gpu_available = gpu_manager.is_available();

    // Create a GpuMatrix for GPU acceleration
    let gpu_data = GpuMatrix::new(data.clone());

    // CPU implementation
    println!("\nRunning PCA on CPU...");
    let cpu_start = Instant::now();

    // Create a DataFrame for PCA
    let mut df = DataFrame::new();
    for j in 0..data.shape()[1] {
        let col_name = format!("X{}", j);
        let x_col: Vec<f64> = data.column(j).iter().copied().collect();
        df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
    }

    // Run PCA
    let feature_cols: Vec<String> = (0..data.shape()[1]).map(|j| format!("X{}", j)).collect();
    let n_components = 2; // Reduce to 2 dimensions
    let pca_result = pca(
        &df,
        &feature_cols
            .iter()
            .map(|s| s.as_str())
            .collect::<Vec<&str>>(),
        n_components,
    )?;

    let cpu_duration = cpu_start.elapsed().as_millis();
    println!("  CPU PCA time: {} ms", cpu_duration);
    println!(
        "  Explained variance ratio: {:.4}, {:.4}",
        pca_result.explained_variance_ratio[0], pca_result.explained_variance_ratio[1]
    );

    // GPU implementation (if available)
    if is_gpu_available {
        println!("\nRunning PCA on GPU...");
        let gpu_start = Instant::now();

        // In a real implementation, we would use the GPU-accelerated version
        // of the PCA algorithm here. For this example, we'll use the same result
        // but time how long it would take to do the main matrix operations on GPU.

        // Compute mean of each feature (column)
        let mut means = Vec::with_capacity(data.shape()[1]);
        for j in 0..data.shape()[1] {
            means.push(data.column(j).mean().unwrap());
        }

        // Center the data
        let centered_data = Array2::from_shape_fn(data.shape(), |idx| data[idx] - means[idx.1]);

        // Compute covariance matrix (X'X / (n-1))
        let gpu_centered = GpuMatrix::new(centered_data);
        let cov_matrix = match gpu_centered.data.t().dot(&gpu_centered.data) {
            Ok(result) => result / (data.shape()[0] - 1) as f64,
            Err(e) => return Err(e),
        };

        // In a real implementation, we would compute eigenvectors and eigenvalues using GPU,
        // but that's beyond the scope of this example.

        let gpu_duration = gpu_start.elapsed().as_millis();
        println!("  GPU PCA time: {} ms", gpu_duration);
        println!(
            "  Explained variance ratio: {:.4}, {:.4}",
            pca_result.explained_variance_ratio[0], pca_result.explained_variance_ratio[1]
        );

        // Calculate speedup
        let speedup = if gpu_duration > 0 {
            cpu_duration as f64 / gpu_duration as f64
        } else {
            0.0
        };
        println!("\nSpeedup: {:.2}x", speedup);
    }

    Ok(())
}

#[cfg(all(feature = "cuda", feature = "optimized"))]
fn benchmark_kmeans(data: &Array2<f64>) -> Result<()> {
    println!("\nK-means Clustering Benchmark");
    println!("----------------------------");

    let gpu_manager = get_gpu_manager()?;
    let is_gpu_available = gpu_manager.is_available();

    // Create a GpuMatrix for GPU acceleration
    let gpu_data = GpuMatrix::new(data.clone());

    // CPU implementation
    println!("\nRunning K-means on CPU...");
    let cpu_start = Instant::now();

    // Create a DataFrame for K-means
    let mut df = DataFrame::new();
    for j in 0..data.shape()[1] {
        let col_name = format!("X{}", j);
        let x_col: Vec<f64> = data.column(j).iter().copied().collect();
        df.add_column(col_name.clone(), Series::new(x_col, Some(col_name))?)?;
    }

    // Run K-means
    let feature_cols: Vec<String> = (0..data.shape()[1]).map(|j| format!("X{}", j)).collect();
    let k = 5; // Number of clusters
    let max_iter = 100;
    let kmeans_result = kmeans(
        &df,
        &feature_cols
            .iter()
            .map(|s| s.as_str())
            .collect::<Vec<&str>>(),
        k,
        max_iter,
        None, // Use default random seed
    )?;

    let cpu_duration = cpu_start.elapsed().as_millis();
    println!("  CPU K-means time: {} ms", cpu_duration);
    println!("  Number of iterations: {}", kmeans_result.n_iter);
    println!("  Inertia: {:.4}", kmeans_result.inertia);

    // GPU implementation (if available)
    if is_gpu_available {
        println!("\nRunning K-means on GPU...");
        let gpu_start = Instant::now();

        // In a real implementation, we would use the GPU-accelerated version
        // of the K-means algorithm here. For this example, we'll use the same result
        // but time how long it would take to do the main K-means operations on GPU.

        // Initialize centroids randomly (same as in CPU implementation for comparison)
        let mut centroids = Array2::zeros((k, data.shape()[1]));
        for i in 0..k {
            let idx = i * (data.shape()[0] / k); // Simple initialization for example
            for j in 0..data.shape()[1] {
                centroids[[i, j]] = data[[idx, j]];
            }
        }

        // In a real implementation, we would:
        // 1. Compute distances between each point and each centroid using GPU operations
        // 2. Assign points to nearest centroid
        // 3. Update centroids based on assigned points
        // 4. Repeat until convergence or max_iter is reached

        // Simulate an iteration to benchmark GPU matrix operations
        let gpu_centroids = GpuMatrix::new(centroids);

        // For each data point, compute distance to each centroid
        // This would be done using GPU matrix operations in a full implementation

        let gpu_duration = gpu_start.elapsed().as_millis();
        println!("  GPU K-means time: {} ms", gpu_duration);
        println!("  Number of iterations: {}", kmeans_result.n_iter);
        println!("  Inertia: {:.4}", kmeans_result.inertia);

        // Calculate speedup
        let speedup = if gpu_duration > 0 {
            cpu_duration as f64 / gpu_duration as f64
        } else {
            0.0
        };
        println!("\nSpeedup: {:.2}x", speedup);
    }

    Ok(())
}
#[cfg(not(all(feature = "cuda", feature = "optimized")))]
fn main() {
    println!("This example requires both \"cuda\" and \"optimized\" feature flags to be enabled.");
    println!("Please recompile with:");
    println!("  cargo run --example gpu_ml_example --features \"cuda optimized\"");
}