# SciRS2-Core Examples
This document provides comprehensive examples demonstrating the capabilities of SciRS2-Core across different scientific computing domains.
## Table of Contents
1. [Linear Algebra](#linear-algebra)
2. [Signal Processing](#signal-processing)
3. [Statistical Computing](#statistical-computing)
4. [Image Processing](#image-processing)
5. [Monte Carlo Simulations](#monte-carlo-simulations)
6. [GPU Computing](#gpu-computing)
7. [Performance Optimization](#performance-optimization)
8. [Testing and Validation](#testing-and-validation)
## Linear Algebra
### High-Performance Matrix Operations
```rust
use scirs2_core::{
simd_ops::SimdUnifiedOps,
memory::create_scientific_pool,
chunking::{ChunkConfig, MatrixChunking},
testing::NumericAssertion,
};
use ndarray::{Array2, Axis};
fn matrix_operations_example() -> Result<(), Box<dyn std::error::Error>> {
// Create optimized memory pool
let mut pool = create_scientific_pool::<f64>();
// Generate test matrices
let size = 1000;
let a = Array2::<f64>::from_shape_fn((size, size), |(i, j)| {
(i as f64).sin() * (j as f64).cos()
});
let b = Array2::<f64>::from_shape_fn((size, size), |(i, j)| {
(i as f64 + j as f64) / (size as f64)
});
// Optimized matrix chunking
let (row_chunk, col_chunk_a, col_chunk_b) = MatrixChunking::matrix_multiply_chunks(
a.nrows(), a.ncols(), b.ncols()
);
println!("Optimal chunk sizes: {}x{}, {}", row_chunk, col_chunk_a, col_chunk_b);
// SIMD-accelerated row operations
let row_norms: Vec<f64> = a.axis_iter(Axis(0))
.map(|row| f64::simd_norm(&row))
.collect();
// Parallel matrix operations with chunking
let chunk_config = ChunkConfig::linear_algebra();
let processed_rows: Vec<_> = a.axis_iter(Axis(0))
.enumerate()
.map(|(i, row)| {
// Normalize each row using SIMD
let norm = row_norms[i];
if norm > 1e-10 {
f64::simd_mul(&row, &ndarray::Array1::from_elem(row.len(), 1.0 / norm).view())
} else {
row.to_owned()
}
})
.collect();
// Reconstruct normalized matrix
let mut normalized = Array2::zeros((a.nrows(), a.ncols()));
for (i, row) in processed_rows.into_iter().enumerate() {
normalized.row_mut(i).assign(&row);
}
// Verify normalization
for i in 0..normalized.nrows() {
let row_norm = f64::simd_norm(&normalized.row(i));
if row_norms[i] > 1e-10 {
row_norm.assert_approx_eq(&1.0, 1e-12);
}
}
println!("Matrix normalization completed successfully");
Ok(())
}
```
### Eigenvalue Computation with SIMD
```rust
use scirs2_core::{
simd_ops::SimdUnifiedOps,
testing::TestDataGenerator,
validation::{MatrixValidator, ProductionValidator},
};
fn eigenvalue_power_method() -> Result<f64, Box<dyn std::error::Error>> {
let mut gen = TestDataGenerator::with_seed(42);
// Generate symmetric positive definite matrix
let matrix = gen.positive_definite_matrix(100);
// Validate matrix properties
let validator = MatrixValidator::<f64>::new();
validator.validate_symmetric(&matrix)?;
validator.validate_positive_diagonal(&matrix)?;
// Power method for largest eigenvalue
let mut v = gen.normal_distribution(matrix.ncols(), 0.0, 1.0);
let max_iterations = 1000;
let tolerance = 1e-12;
for iteration in 0..max_iterations {
// Matrix-vector multiplication with SIMD
let mut v_new = ndarray::Array1::zeros(matrix.ncols());
for (i, row) in matrix.axis_iter(ndarray::Axis(0)).enumerate() {
v_new[i] = f64::simd_dot(&row, &v.view());
}
// Normalize with SIMD
let norm = f64::simd_norm(&v_new.view());
v_new = f64::simd_mul(&v_new.view(), &ndarray::Array1::from_elem(v_new.len(), 1.0 / norm).view());
// Check convergence
let diff = f64::simd_sub(&v_new.view(), &v.view());
let error = f64::simd_norm(&diff.view());
if error < tolerance {
println!("Converged after {} iterations", iteration + 1);
break;
}
v = v_new;
}
// Compute Rayleigh quotient (eigenvalue estimate)
let av = matrix.dot(&v);
let eigenvalue = f64::simd_dot(&v.view(), &av.view());
println!("Largest eigenvalue: {:.6}", eigenvalue);
Ok(eigenvalue)
}
```
## Signal Processing
### FFT-Optimized Signal Analysis
```rust
use scirs2_core::{
chunking::{ChunkConfig, ChunkingUtils},
simd_ops::SimdUnifiedOps,
constants::math,
testing::TestDataGenerator,
};
fn signal_processing_example() -> Result<(), Box<dyn std::error::Error>> {
let mut gen = TestDataGenerator::with_seed(123);
// Generate time series with multiple frequency components
let sample_rate = 1000.0; // Hz
let duration = 2.0; // seconds
let num_samples = (sample_rate * duration) as usize;
let time_series = (0..num_samples)
.map(|i| {
let t = i as f64 / sample_rate;
// Signal: 50Hz sine + 120Hz sine + noise
50.0_f64.sin() * (2.0 * math::PI * 50.0 * t).sin() +
30.0_f64 * (2.0 * math::PI * 120.0 * t).sin() +
gen.random_float(-5.0, 5.0) // noise
})
.collect::<Vec<f64>>();
let signal = ndarray::Array1::from_vec(time_series);
// Optimized chunking for signal processing (power-of-2 sizes)
let chunk_config = ChunkConfig::signal_processing();
let chunk_size = ChunkingUtils::optimal_chunk_size(signal.len(), &chunk_config);
println!("Optimal chunk size for FFT: {}", chunk_size);
// Apply windowing function (Hamming window) with SIMD
let window: ndarray::Array1<f64> = ndarray::Array1::from_shape_fn(signal.len(), |i| {
0.54 - 0.46 * (2.0 * math::PI * i as f64 / (signal.len() - 1) as f64).cos()
});
let windowed_signal = f64::simd_mul(&signal.view(), &window.view());
// Compute moving average for trend analysis
let window_size = 50;
let mut moving_avg = Vec::with_capacity(signal.len() - window_size + 1);
for i in 0..=(signal.len() - window_size) {
let window_slice = windowed_signal.slice(ndarray::s![i..i + window_size]);
let avg = f64::simd_sum(&window_slice) / window_size as f64;
moving_avg.push(avg);
}
// Compute signal energy
let signal_power = f64::simd_dot(&windowed_signal.view(), &windowed_signal.view()) / signal.len() as f64;
println!("Signal power: {:.3} dB", 10.0 * signal_power.log10());
// High-pass filtering (simple difference filter) with SIMD
let mut filtered = ndarray::Array1::zeros(signal.len() - 1);
for i in 0..filtered.len() {
filtered[i] = windowed_signal[i + 1] - windowed_signal[i];
}
println!("Signal processing completed: {} samples processed", signal.len());
Ok(())
}
```
### Real-time Signal Filtering
```rust
use scirs2_core::{
simd_ops::SimdUnifiedOps,
memory::{AdvancedBufferPool, MemoryConfig, AccessPattern},
parallel_ops::parallel_map,
};
struct RealTimeFilter {
coefficients: ndarray::Array1<f64>,
buffer: std::collections::VecDeque<f64>,
memory_pool: AdvancedBufferPool<f64>,
}
impl RealTimeFilter {
fn new(filter_order: usize) -> Self {
// Create low-pass Butterworth filter coefficients (simplified)
let coeffs = ndarray::Array1::from_shape_fn(filter_order, |i| {
(-2.0 * std::f64::consts::PI * i as f64 / filter_order as f64).exp()
});
let config = MemoryConfig {
access_pattern: AccessPattern::Streaming,
enable_prefetch: true,
..Default::default()
};
Self {
coefficients: coeffs,
buffer: std::collections::VecDeque::with_capacity(filter_order),
memory_pool: AdvancedBufferPool::with_config(config),
}
}
fn process_sample(&mut self, input: f64) -> f64 {
// Add new sample to buffer
self.buffer.push_back(input);
if self.buffer.len() > self.coefficients.len() {
self.buffer.pop_front();
}
// Convolution with SIMD
if self.buffer.len() == self.coefficients.len() {
let buffer_array = ndarray::Array1::from_vec(self.buffer.iter().copied().collect());
f64::simd_dot(&buffer_array.view(), &self.coefficients.view())
} else {
input // Pass-through until buffer is full
}
}
fn process_block(&mut self, input_block: &[f64]) -> Vec<f64> {
parallel_map(input_block, |&sample| self.process_sample(sample))
}
}
fn real_time_filtering_example() -> Result<(), Box<dyn std::error::Error>> {
let mut filter = RealTimeFilter::new(64);
// Simulate real-time data stream
let mut gen = scirs2_core::testing::TestDataGenerator::with_seed(456);
let block_size = 256;
for block_idx in 0..10 {
// Generate input block
let input_block: Vec<f64> = (0..block_size)
.map(|_| gen.random_float(-1.0, 1.0))
.collect();
// Process block
let filtered_block = filter.process_block(&input_block);
println!("Processed block {}: {} samples", block_idx, filtered_block.len());
// In real application, output would be sent to audio interface
// or next processing stage
}
Ok(())
}
```
## Statistical Computing
### High-Performance Statistical Analysis
```rust
use scirs2_core::{
simd_ops::SimdUnifiedOps,
parallel_ops::{parallel_map, parallel_reduce},
testing::{TestDataGenerator, NumericAssertion},
chunking::ChunkConfig,
};
fn statistical_analysis_example() -> Result<(), Box<dyn std::error::Error>> {
let mut gen = TestDataGenerator::with_seed(789);
// Generate large dataset with normal distribution
let sample_size = 1_000_000;
let data = gen.normal_distribution(sample_size, 50.0, 15.0);
println!("Computing statistics for {} samples", sample_size);
// Basic statistics with SIMD acceleration
let n = data.len() as f64;
let sum = f64::simd_sum(&data.view());
let mean = sum / n;
// Compute variance with SIMD
let mean_array = ndarray::Array1::from_elem(data.len(), mean);
let deviations = f64::simd_sub(&data.view(), &mean_array.view());
let squared_deviations = f64::simd_mul(&deviations.view(), &deviations.view());
let variance = f64::simd_sum(&squared_deviations.view()) / (n - 1.0);
let std_dev = variance.sqrt();
println!("Mean: {:.3}", mean);
println!("Standard Deviation: {:.3}", std_dev);
// Verify against expected values
mean.assert_approx_eq(&50.0, 1.0); // Within 1.0 due to random sampling
std_dev.assert_approx_eq(&15.0, 2.0); // Within 2.0 due to random sampling
// Parallel quantile computation
let mut sorted_data = data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let quantiles = [0.25, 0.5, 0.75, 0.95, 0.99];
let quantile_values: Vec<f64> = quantiles.iter()
.map(|&q| {
let index = ((data.len() - 1) as f64 * q) as usize;
sorted_data[index]
})
.collect();
for (q, val) in quantiles.iter().zip(quantile_values.iter()) {
println!("Q{}: {:.3}", (q * 100.0) as u32, val);
}
// Parallel histogram computation
let num_bins = 50;
let min_val = sorted_data[0];
let max_val = sorted_data[sorted_data.len() - 1];
let bin_width = (max_val - min_val) / num_bins as f64;
let chunk_config = ChunkConfig::compute_intensive();
let histogram = parallel_reduce(&sorted_data, vec![0u32; num_bins], |mut hist, &value| {
let bin_index = ((value - min_val) / bin_width) as usize;
let bin_index = bin_index.min(num_bins - 1);
hist[bin_index] += 1;
hist
});
println!("Histogram computed with {} bins", num_bins);
// Outlier detection using IQR method
let q1 = quantile_values[0];
let q3 = quantile_values[2];
let iqr = q3 - q1;
let lower_bound = q1 - 1.5 * iqr;
let upper_bound = q3 + 1.5 * iqr;
let outliers: Vec<f64> = data.iter()
.filter(|&&x| x < lower_bound || x > upper_bound)
.copied()
.collect();
println!("Found {} outliers ({:.2}%)",
outliers.len(),
outliers.len() as f64 / data.len() as f64 * 100.0);
Ok(())
}
```
### Bootstrap Confidence Intervals
```rust
use scirs2_core::{
testing::TestDataGenerator,
parallel_ops::parallel_map,
simd_ops::SimdUnifiedOps,
chunking::ChunkConfig,
};
fn bootstrap_confidence_interval(
data: &[f64],
num_bootstrap: usize,
confidence_level: f64
) -> Result<(f64, f64, f64), Box<dyn std::error::Error>> {
let mut gen = TestDataGenerator::with_seed(101112);
let chunk_config = ChunkConfig::monte_carlo();
// Generate bootstrap samples
let bootstrap_means: Vec<f64> = (0..num_bootstrap)
.map(|_| {
// Resample with replacement
let bootstrap_sample: Vec<f64> = (0..data.len())
.map(|_| {
let idx = gen.random_int(0, data.len() as i32 - 1) as usize;
data[idx]
})
.collect();
// Compute mean of bootstrap sample
let sample_array = ndarray::Array1::from_vec(bootstrap_sample);
f64::simd_sum(&sample_array.view()) / sample_array.len() as f64
})
.collect();
// Sort bootstrap means
let mut sorted_means = bootstrap_means;
sorted_means.sort_by(|a, b| a.partial_cmp(b).unwrap());
// Compute confidence interval
let alpha = 1.0 - confidence_level;
let lower_index = ((alpha / 2.0) * num_bootstrap as f64) as usize;
let upper_index = ((1.0 - alpha / 2.0) * num_bootstrap as f64) as usize;
let lower_bound = sorted_means[lower_index];
let upper_bound = sorted_means[upper_index];
// Original sample mean
let original_data = ndarray::Array1::from_vec(data.to_vec());
let sample_mean = f64::simd_sum(&original_data.view()) / data.len() as f64;
Ok((sample_mean, lower_bound, upper_bound))
}
fn bootstrap_example() -> Result<(), Box<dyn std::error::Error>> {
let mut gen = TestDataGenerator::with_seed(131415);
let data = gen.normal_distribution(1000, 25.0, 5.0);
let (mean, lower, upper) = bootstrap_confidence_interval(
&data.to_vec(),
10000,
0.95
)?;
println!("Sample mean: {:.3}", mean);
println!("95% Confidence Interval: [{:.3}, {:.3}]", lower, upper);
println!("Interval width: {:.3}", upper - lower);
Ok(())
}
```
## Image Processing
### SIMD-Accelerated Image Operations
```rust
use scirs2_core::{
simd_ops::SimdUnifiedOps,
chunking::{ChunkConfig, MatrixChunking},
parallel_ops::parallel_map,
memory::create_optimized_pool,
};
struct Image {
data: ndarray::Array3<f32>, // Height x Width x Channels
width: usize,
height: usize,
channels: usize,
}
impl Image {
fn new(width: usize, height: usize, channels: usize) -> Self {
Self {
data: ndarray::Array3::zeros((height, width, channels)),
width,
height,
channels,
}
}
fn from_data(data: ndarray::Array3<f32>) -> Self {
let shape = data.shape();
Self {
width: shape[1],
height: shape[0],
channels: shape[2],
data,
}
}
// SIMD-accelerated gaussian blur
fn gaussian_blur(&self, sigma: f32) -> Result<Image, Box<dyn std::error::Error>> {
let kernel_size = (6.0 * sigma).ceil() as usize | 1; // Ensure odd size
let kernel = self.gaussian_kernel(kernel_size, sigma);
let chunk_config = ChunkConfig::image_processing();
let mut result = Image::new(self.width, self.height, self.channels);
// Process each channel separately
for c in 0..self.channels {
let channel = self.data.slice(ndarray::s![.., .., c]);
let blurred_channel = self.convolution_2d(&channel, &kernel)?;
result.data.slice_mut(ndarray::s![.., .., c]).assign(&blurred_channel);
}
Ok(result)
}
fn gaussian_kernel(&self, size: usize, sigma: f32) -> ndarray::Array2<f32> {
let center = size / 2;
let sigma_sq = sigma * sigma;
let norm_factor = 1.0 / (2.0 * std::f32::consts::PI * sigma_sq);
ndarray::Array2::from_shape_fn((size, size), |(i, j)| {
let x = i as f32 - center as f32;
let y = j as f32 - center as f32;
let exponent = -(x * x + y * y) / (2.0 * sigma_sq);
norm_factor * exponent.exp()
})
}
fn convolution_2d(
&self,
image: &ndarray::ArrayView2<f32>,
kernel: &ndarray::Array2<f32>
) -> Result<ndarray::Array2<f32>, Box<dyn std::error::Error>> {
let (img_h, img_w) = image.dim();
let (ker_h, ker_w) = kernel.dim();
let pad_h = ker_h / 2;
let pad_w = ker_w / 2;
let mut result = ndarray::Array2::zeros((img_h, img_w));
// Parallel convolution with SIMD
let rows: Vec<usize> = (pad_h..img_h - pad_h).collect();
let processed_rows: Vec<_> = parallel_map(&rows, |&row| {
let mut row_result = Vec::with_capacity(img_w - 2 * pad_w);
for col in pad_w..img_w - pad_w {
let mut sum = 0.0f32;
// Extract neighborhood
let neighborhood = image.slice(ndarray::s![
row - pad_h..row + pad_h + 1,
col - pad_w..col + pad_w + 1
]);
// SIMD-accelerated convolution
for ((n_val, k_val), _) in neighborhood.iter()
.zip(kernel.iter())
.zip(0..)
{
sum += n_val * k_val;
}
row_result.push(sum);
}
(row, row_result)
});
// Assign results back
for (row, row_data) in processed_rows {
for (col, &value) in row_data.iter().enumerate() {
result[[row, col + pad_w]] = value;
}
}
Ok(result)
}
// Edge detection using Sobel operator
fn sobel_edge_detection(&self) -> Result<Image, Box<dyn std::error::Error>> {
let sobel_x = ndarray::arr2(&[
[-1.0, 0.0, 1.0],
[-2.0, 0.0, 2.0],
[-1.0, 0.0, 1.0]
]);
let sobel_y = ndarray::arr2(&[
[-1.0, -2.0, -1.0],
[ 0.0, 0.0, 0.0],
[ 1.0, 2.0, 1.0]
]);
let mut result = Image::new(self.width, self.height, 1);
// Convert to grayscale if necessary
let grayscale = if self.channels == 3 {
let mut gray = ndarray::Array2::zeros((self.height, self.width));
for i in 0..self.height {
for j in 0..self.width {
let r = self.data[[i, j, 0]];
let g = self.data[[i, j, 1]];
let b = self.data[[i, j, 2]];
gray[[i, j]] = 0.299 * r + 0.587 * g + 0.114 * b;
}
}
gray
} else {
self.data.slice(ndarray::s![.., .., 0]).to_owned()
};
// Apply Sobel operators
let grad_x = self.convolution_2d(&grayscale.view(), &sobel_x)?;
let grad_y = self.convolution_2d(&grayscale.view(), &sobel_y)?;
// Compute gradient magnitude with SIMD
for i in 0..self.height {
for j in 0..self.width {
let gx = grad_x[[i, j]];
let gy = grad_y[[i, j]];
let magnitude = (gx * gx + gy * gy).sqrt();
result.data[[i, j, 0]] = magnitude;
}
}
Ok(result)
}
}
fn image_processing_example() -> Result<(), Box<dyn std::error::Error>> {
// Create test image with pattern
let width = 512;
let height = 512;
let channels = 3;
let test_image = ndarray::Array3::from_shape_fn((height, width, channels), |(i, j, c)| {
let x = j as f32 / width as f32;
let y = i as f32 / height as f32;
match c {
0 => ((x * 10.0).sin() * 0.5 + 0.5), // Red channel
1 => ((y * 10.0).sin() * 0.5 + 0.5), // Green channel
2 => ((x * y * 50.0).sin() * 0.5 + 0.5), // Blue channel
_ => 0.0,
}
});
let image = Image::from_data(test_image);
println!("Created test image: {}x{} with {} channels", width, height, channels);
// Apply Gaussian blur
let blurred = image.gaussian_blur(2.0)?;
println!("Applied Gaussian blur with sigma=2.0");
// Detect edges
let edges = blurred.sobel_edge_detection()?;
println!("Applied Sobel edge detection");
println!("Image processing pipeline completed successfully");
Ok(())
}
```
## Monte Carlo Simulations
### High-Performance Monte Carlo Integration
```rust
use scirs2_core::{
testing::TestDataGenerator,
parallel_ops::parallel_reduce,
chunking::ChunkConfig,
simd_ops::SimdUnifiedOps,
constants::math,
};
fn monte_carlo_pi_estimation(num_samples: usize) -> Result<f64, Box<dyn std::error::Error>> {
let chunk_config = ChunkConfig::monte_carlo();
let num_threads = std::thread::available_parallelism()?.get();
let samples_per_thread = num_samples / num_threads;
// Parallel Monte Carlo simulation
let thread_seeds: Vec<u64> = (0..num_threads).map(|i| i as u64 * 12345).collect();
let total_inside = parallel_reduce(&thread_seeds, 0u64, |acc, &seed| {
let mut gen = TestDataGenerator::with_seed(seed);
let mut inside_count = 0u64;
for _ in 0..samples_per_thread {
let x = gen.random_float(-1.0, 1.0);
let y = gen.random_float(-1.0, 1.0);
if x * x + y * y <= 1.0 {
inside_count += 1;
}
}
acc + inside_count
});
let pi_estimate = 4.0 * total_inside as f64 / num_samples as f64;
println!("Monte Carlo π estimation:");
println!("Samples: {}", num_samples);
println!("Estimated π: {:.6}", pi_estimate);
println!("Actual π: {:.6}", math::PI);
println!("Error: {:.6}", (pi_estimate - math::PI).abs());
Ok(pi_estimate)
}
// Multi-dimensional integration using Monte Carlo
fn monte_carlo_integration<F>(
func: F,
bounds: &[(f64, f64)],
num_samples: usize
) -> Result<f64, Box<dyn std::error::Error>>
where
F: Fn(&[f64]) -> f64 + Send + Sync,
{
let dimensions = bounds.len();
let chunk_config = ChunkConfig::monte_carlo();
let num_threads = std::thread::available_parallelism()?.get();
let samples_per_thread = num_samples / num_threads;
// Calculate volume of integration domain
let volume: f64 = bounds.iter()
.map(|(min, max)| max - min)
.product();
let thread_seeds: Vec<u64> = (0..num_threads).map(|i| i as u64 * 54321).collect();
let sum = parallel_reduce(&thread_seeds, 0.0f64, |acc, &seed| {
let mut gen = TestDataGenerator::with_seed(seed);
let mut thread_sum = 0.0;
for _ in 0..samples_per_thread {
let point: Vec<f64> = bounds.iter()
.map(|(min, max)| gen.random_float(*min, *max))
.collect();
thread_sum += func(&point);
}
acc + thread_sum
});
let result = volume * sum / num_samples as f64;
Ok(result)
}
fn monte_carlo_examples() -> Result<(), Box<dyn std::error::Error>> {
// π estimation
monte_carlo_pi_estimation(10_000_000)?;
// Multi-dimensional integration: ∫∫∫ x²y²z² dx dy dz over [0,1]³
let integral_result = monte_carlo_integration(
|point| point[0].powi(2) * point[1].powi(2) * point[2].powi(2),
&[(0.0, 1.0), (0.0, 1.0), (0.0, 1.0)],
1_000_000
)?;
let analytical_result = 1.0 / 27.0; // (1/3)³
println!("\nMulti-dimensional integration:");
println!("Estimated: {:.6}", integral_result);
println!("Analytical: {:.6}", analytical_result);
println!("Error: {:.6}", (integral_result - analytical_result).abs());
// Option pricing using Black-Scholes Monte Carlo
let option_price = black_scholes_monte_carlo(
100.0, // Current stock price
105.0, // Strike price
0.25, // Time to expiration (years)
0.05, // Risk-free rate
0.2, // Volatility
1_000_000 // Number of simulations
)?;
println!("\nOption pricing (Monte Carlo):");
println!("Call option price: ${:.4}", option_price);
Ok(())
}
fn black_scholes_monte_carlo(
s0: f64, // Current stock price
strike: f64, // Strike price
t: f64, // Time to expiration
r: f64, // Risk-free rate
sigma: f64, // Volatility
num_simulations: usize
) -> Result<f64, Box<dyn std::error::Error>> {
let chunk_config = ChunkConfig::monte_carlo();
let num_threads = std::thread::available_parallelism()?.get();
let sims_per_thread = num_simulations / num_threads;
let thread_seeds: Vec<u64> = (0..num_threads).map(|i| i as u64 * 98765).collect();
let payoff_sum = parallel_reduce(&thread_seeds, 0.0f64, |acc, &seed| {
let mut gen = TestDataGenerator::with_seed(seed);
let mut thread_payoff_sum = 0.0;
for _ in 0..sims_per_thread {
// Generate random normal variable (Box-Muller transform)
let u1 = gen.random_float(0.0, 1.0);
let u2 = gen.random_float(0.0, 1.0);
let z = (-2.0 * u1.ln()).sqrt() * (2.0 * math::PI * u2).cos();
// Stock price at expiration using geometric Brownian motion
let st = s0 * ((r - 0.5 * sigma * sigma) * t + sigma * t.sqrt() * z).exp();
// Call option payoff
let payoff = (st - strike).max(0.0);
thread_payoff_sum += payoff;
}
acc + thread_payoff_sum
});
// Discount to present value
let option_price = (payoff_sum / num_simulations as f64) * (-r * t).exp();
Ok(option_price)
}
```
## GPU Computing
### CUDA Acceleration Example
```rust
use scirs2_core::gpu::{
GpuContext, GpuBackend, GpuBuffer, ElementwiseAddKernel, GemvKernel
};
fn gpu_computing_example() -> Result<(), Box<dyn std::error::Error>> {
// Initialize GPU context
let context = GpuContext::new(GpuBackend::CUDA, 0)?;
println!("GPU context initialized: {:?}", context.backend);
// Large arrays for GPU processing
let size = 1_000_000;
let a: Vec<f32> = (0..size).map(|i| i as f32).collect();
let b: Vec<f32> = (0..size).map(|i| (i * 2) as f32).collect();
// Create GPU buffers
let buffer_a = GpuBuffer::from_slice(&context, &a)?;
let buffer_b = GpuBuffer::from_slice(&context, &b)?;
let buffer_result = GpuBuffer::zeros(&context, size)?;
println!("Created GPU buffers for {} elements", size);
// Execute element-wise addition kernel
let add_kernel = ElementwiseAddKernel::new();
add_kernel.launch(&context, &[&buffer_a, &buffer_b, &buffer_result])?;
// Copy result back to host
let result = buffer_result.to_vec()?;
// Verify results
for i in 0..std::cmp::min(10, size) {
let expected = a[i] + b[i];
assert!((result[i] - expected).abs() < 1e-6);
}
println!("Element-wise addition completed successfully on GPU");
// Matrix-vector multiplication example
let matrix_size = 2048;
let matrix: Vec<f32> = (0..matrix_size * matrix_size)
.map(|i| (i as f32).sin())
.collect();
let vector: Vec<f32> = (0..matrix_size)
.map(|i| (i as f32).cos())
.collect();
let matrix_buffer = GpuBuffer::from_slice(&context, &matrix)?;
let vector_buffer = GpuBuffer::from_slice(&context, &vector)?;
let mv_result_buffer = GpuBuffer::zeros(&context, matrix_size)?;
// Execute GEMV kernel (General Matrix-Vector multiplication)
let gemv_kernel = GemvKernel::new();
gemv_kernel.launch(&context, &[&matrix_buffer, &vector_buffer, &mv_result_buffer])?;
let mv_result = mv_result_buffer.to_vec()?;
println!("Matrix-vector multiplication completed: {}x{} matrix", matrix_size, matrix_size);
Ok(())
}
// Hybrid CPU/GPU computation
fn hybrid_cpu_gpu_example() -> Result<(), Box<dyn std::error::Error>> {
use scirs2_core::{
chunking::ChunkConfig,
simd_ops::SimdUnifiedOps,
parallel_ops::parallel_map,
};
let total_size = 10_000_000;
let gpu_threshold = 100_000; // Use GPU for chunks larger than this
// Generate test data
let data: Vec<f32> = (0..total_size)
.map(|i| (i as f32 / 1000.0).sin())
.collect();
let chunk_config = ChunkConfig::gpu_hybrid();
let chunks: Vec<&[f32]> = data.chunks(gpu_threshold).collect();
// Process chunks with appropriate backend
let results: Vec<Vec<f32>> = parallel_map(&chunks, |chunk| {
if chunk.len() >= gpu_threshold {
// Use GPU for large chunks
gpu_process_chunk(chunk).unwrap_or_else(|_| {
// Fallback to CPU if GPU fails
cpu_process_chunk(chunk)
})
} else {
// Use CPU for small chunks
cpu_process_chunk(chunk)
}
});
// Combine results
let final_result: Vec<f32> = results.into_iter().flatten().collect();
println!("Hybrid CPU/GPU processing completed: {} elements", final_result.len());
Ok(())
}
fn gpu_process_chunk(chunk: &[f32]) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
let context = GpuContext::new(GpuBackend::CUDA, 0)?;
let input_buffer = GpuBuffer::from_slice(&context, chunk)?;
let output_buffer = GpuBuffer::zeros(&context, chunk.len())?;
// Apply square kernel
let square_kernel = ElementwiseSquareKernel::new();
square_kernel.launch(&context, &[&input_buffer, &output_buffer])?;
output_buffer.to_vec()
}
fn cpu_process_chunk(chunk: &[f32]) -> Vec<f32> {
// SIMD-accelerated processing on CPU
let chunk_array = ndarray::Array1::from_slice(chunk);
let result = f32::simd_mul(&chunk_array.view(), &chunk_array.view());
result.to_vec()
}
```
## Performance Optimization
### Comprehensive Performance Analysis
```rust
use scirs2_core::{
testing::BenchmarkSuite,
memory::{create_scientific_pool, MemoryReport},
chunking::{ChunkPerformanceMonitor, ChunkMeasurement},
simd_ops::SimdUnifiedOps,
};
use std::time::{Duration, Instant};
fn performance_analysis_example() -> Result<(), Box<dyn std::error::Error>> {
let mut benchmark_suite = BenchmarkSuite::new()
.with_regression_threshold(1.1);
// Set up test data
let sizes = [1000, 10000, 100000, 1000000];
let mut memory_pool = create_scientific_pool::<f64>();
let mut chunk_monitor = ChunkPerformanceMonitor::new();
for &size in &sizes {
let data_a: Vec<f64> = (0..size).map(|i| i as f64).collect();
let data_b: Vec<f64> = (0..size).map(|i| (i * 2) as f64).collect();
let array_a = ndarray::Array1::from_vec(data_a);
let array_b = ndarray::Array1::from_vec(data_b);
// Benchmark scalar operations
benchmark_suite.add_benchmark(&format!("scalar_add_{}", size), {
let a = array_a.clone();
let b = array_b.clone();
move || {
let start = Instant::now();
let _result = &a + &b;
start.elapsed()
}
});
// Benchmark SIMD operations
benchmark_suite.add_benchmark(&format!("simd_add_{}", size), {
let a = array_a.clone();
let b = array_b.clone();
move || {
let start = Instant::now();
let _result = f64::simd_add(&a.view(), &b.view());
start.elapsed()
}
});
// Benchmark memory pool operations
benchmark_suite.add_benchmark(&format!("memory_pool_{}", size), {
let mut pool = create_scientific_pool::<f64>();
move || {
let start = Instant::now();
let buffer = pool.acquire_vec_advanced(size);
pool.release_vec_advanced(buffer);
start.elapsed()
}
});
}
// Run benchmarks
let results = benchmark_suite.run_all();
results.print_results();
// Analyze memory performance
let memory_report = memory_pool.memory_report();
print_memory_analysis(&memory_report);
// Record chunk performance measurements
for &size in &sizes {
let measurement = ChunkMeasurement {
chunk_size: size / 4,
data_size: size,
execution_time: Duration::from_nanos(size as u64 * 10), // Simulated
throughput: size as f64 / (size as f64 * 10e-9), // GB/s
operation_type: "vector_add".to_string(),
};
chunk_monitor.record_measurement(measurement);
}
let chunk_stats = chunk_monitor.get_statistics();
print_chunk_analysis(&chunk_stats);
// Performance scaling analysis
analyze_performance_scaling(&results)?;
Ok(())
}
fn print_memory_analysis(report: &MemoryReport) {
println!("\n=== Memory Performance Analysis ===");
println!("Current usage: {:.2} MB", report.current_usage as f64 / 1_048_576.0);
println!("Peak usage: {:.2} MB", report.peak_usage as f64 / 1_048_576.0);
println!("Pool efficiency: {:.1}%", report.pool_efficiency * 100.0);
println!("Memory pressure: {:?}", report.pressure_level);
println!("Fragmentation: {:.1}%", report.fragmentation_ratio * 100.0);
}
fn print_chunk_analysis(stats: &scirs2_core::chunking::ChunkStatistics) {
println!("\n=== Chunking Performance Analysis ===");
println!("Total measurements: {}", stats.total_measurements);
println!("Average throughput: {:.2} GB/s", stats.avg_throughput);
println!("Peak throughput: {:.2} GB/s", stats.max_throughput);
println!("Minimum throughput: {:.2} GB/s", stats.min_throughput);
println!("Optimized operations: {}", stats.optimal_operations);
}
fn analyze_performance_scaling(
results: &scirs2_core::testing::BenchmarkResults
) -> Result<(), Box<dyn std::error::Error>> {
println!("\n=== Performance Scaling Analysis ===");
let sizes = [1000, 10000, 100000, 1000000];
// Analyze SIMD speedup
for &size in &sizes {
let scalar_time = results.get_result(&format!("scalar_add_{}", size));
let simd_time = results.get_result(&format!("simd_add_{}", size));
if let (Some(scalar), Some(simd)) = (scalar_time, simd_time) {
let speedup = scalar.as_secs_f64() / simd.as_secs_f64();
println!("Size {}: SIMD speedup = {:.2}x", size, speedup);
}
}
// Analyze memory throughput
for &size in &sizes {
let memory_time = results.get_result(&format!("memory_pool_{}", size));
if let Some(time) = memory_time {
let throughput = size as f64 * std::mem::size_of::<f64>() as f64 / time.as_secs_f64() / 1e9;
println!("Size {}: Memory throughput = {:.2} GB/s", size, throughput);
}
}
Ok(())
}
// Cache performance analysis
fn cache_performance_analysis() -> Result<(), Box<dyn std::error::Error>> {
use scirs2_core::chunking::{ChunkConfig, CacheAwareness};
println!("\n=== Cache Performance Analysis ===");
let data_size = 1_000_000;
let test_data: Vec<f64> = (0..data_size).map(|i| i as f64).collect();
let array = ndarray::Array1::from_vec(test_data);
// Test different cache awareness levels
let cache_levels = [
CacheAwareness::None,
CacheAwareness::L1,
CacheAwareness::L2,
CacheAwareness::L3,
CacheAwareness::Full,
];
for cache_level in &cache_levels {
let config = ChunkConfig::default()
.with_cache_awareness(*cache_level);
let start = Instant::now();
// Simulate cache-aware processing
let chunk_size = scirs2_core::chunking::ChunkingUtils::optimal_chunk_size(data_size, &config);
let _result = f64::simd_sum(&array.view());
let duration = start.elapsed();
println!("Cache level {:?}: {:.3} ms (chunk size: {})",
cache_level, duration.as_secs_f64() * 1000.0, chunk_size);
}
Ok(())
}
```
This comprehensive examples document demonstrates the full capabilities of SciRS2-Core across different scientific computing domains, showcasing performance optimization techniques, GPU acceleration, advanced memory management, and robust testing methodologies.