batuta 0.7.3

Sovereign AI orchestration: autonomous agents, ML serving, code analysis, and transpilation pipelines
Documentation
#!/usr/bin/env rust-script
//! NumPy Data Processing Example - Transpiled Output
//!
//! This shows the expected Rust/Trueno code after Batuta transpilation.
//!
//! Demonstrates:
//! - NumPy → Trueno operation mapping
//! - Automatic backend selection (CPU/SIMD/GPU)
//! - Preserved semantics with improved performance/safety
//!
//! Run with: cargo run --example numpy_data_processing_output

use std::collections::HashMap;

// Trueno imports (SIMD/GPU accelerated tensor operations)
// use trueno::{Vector, Tensor, Backend};

/// Simulate loading sensor data (normally from file)
fn load_sensor_data() -> Vec<f64> {
    // Simulated temperature readings (in Celsius) from 100 sensors
    // NOTE: Rust doesn't have built-in random with seed like NumPy
    // In practice, use rand crate with explicit seeding for reproducibility

    // For this example, use pre-generated values matching NumPy output
    let data: Vec<f64> = vec![
        29.967, 24.618, 31.477, 25.230, 24.544,
        19.110, 20.454, 26.240, 24.186, 26.967,
        // ... (truncated for brevity - full 100 values in real code)
    ];

    data
}

/// Clean and normalize sensor data
fn preprocess_data(data: &[f64]) -> Vec<f64> {
    // Compute mean and std
    let mean = data.iter().sum::<f64>() / data.len() as f64;
    let variance = data.iter()
        .map(|x| (x - mean).powi(2))
        .sum::<f64>() / data.len() as f64;
    let std = variance.sqrt();

    // Create mask for valid data points (within 3 standard deviations)
    let lower_bound = mean - 3.0 * std;
    let upper_bound = mean + 3.0 * std;

    // Filter data
    let cleaned_data: Vec<f64> = data.iter()
        .filter(|&&x| x >= lower_bound && x <= upper_bound)
        .copied()
        .collect();

    // Normalize to [0, 1]
    let min_val = cleaned_data.iter().cloned().fold(f64::INFINITY, f64::min);
    let max_val = cleaned_data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
    let range = max_val - min_val;

    let normalized: Vec<f64> = cleaned_data.iter()
        .map(|x| (x - min_val) / range)
        .collect();

    normalized
}

/// Compute statistical metrics for the data
fn compute_statistics(data: &[f64]) -> HashMap<String, f64> {
    let len = data.len() as f64;
    let sum: f64 = data.iter().sum();
    let mean = sum / len;

    let variance = data.iter()
        .map(|x| (x - mean).powi(2))
        .sum::<f64>() / len;
    let std = variance.sqrt();

    // Find min and max
    let min = data.iter().cloned().fold(f64::INFINITY, f64::min);
    let max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);

    // Compute median
    let mut sorted = data.to_vec();
    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let median = if sorted.len() % 2 == 0 {
        let mid = sorted.len() / 2;
        (sorted[mid - 1] + sorted[mid]) / 2.0
    } else {
        sorted[sorted.len() / 2]
    };

    let mut stats = HashMap::new();
    stats.insert("mean".to_string(), mean);
    stats.insert("median".to_string(), median);
    stats.insert("std".to_string(), std);
    stats.insert("min".to_string(), min);
    stats.insert("max".to_string(), max);
    stats.insert("sum".to_string(), sum);
    stats.insert("count".to_string(), len);

    stats
}

/// Detect anomalies in normalized data
fn detect_anomalies(data: &[f64], threshold: f64) -> (Vec<usize>, Vec<f64>) {
    // Find values and indices above threshold
    let mut anomaly_indices = Vec::new();
    let mut anomalies = Vec::new();

    for (idx, &value) in data.iter().enumerate() {
        if value > threshold {
            anomaly_indices.push(idx);
            anomalies.push(value);
        }
    }

    (anomaly_indices, anomalies)
}

/// Compute moving average using convolution
fn moving_average(data: &[f64], window_size: usize) -> Vec<f64> {
    let kernel_value = 1.0 / window_size as f64;

    // Use 'valid' mode convolution (no edge padding)
    let output_len = data.len() - window_size + 1;
    let mut smoothed = Vec::with_capacity(output_len);

    for i in 0..output_len {
        let sum: f64 = data[i..i + window_size]
            .iter()
            .map(|x| x * kernel_value)
            .sum();
        smoothed.push(sum);
    }

    smoothed
}

/// Main data processing pipeline
fn main() {
    println!("NumPy → Rust/Trueno Data Processing Pipeline");
    println!("{}", "=".repeat(50));

    // Load data
    println!("\n1. Loading sensor data...");
    let raw_data = load_sensor_data();
    println!("   Loaded {} data points", raw_data.len());

    // Preprocess
    println!("\n2. Preprocessing data...");
    let processed_data = preprocess_data(&raw_data);
    println!("   Cleaned: {} valid points", processed_data.len());

    // Statistics
    println!("\n3. Computing statistics...");
    let stats = compute_statistics(&processed_data);
    println!("   Mean: {:.4}", stats["mean"]);
    println!("   Std:  {:.4}", stats["std"]);
    println!("   Min:  {:.4}", stats["min"]);
    println!("   Max:  {:.4}", stats["max"]);

    // Anomaly detection
    println!("\n4. Detecting anomalies...");
    let (anomaly_idx, anomaly_vals) = detect_anomalies(&processed_data, 0.95);
    println!("   Found {} anomalies", anomaly_idx.len());
    if !anomaly_vals.is_empty() {
        println!("   Anomaly values: {:?}", anomaly_vals);
    }

    // Smoothing
    println!("\n5. Applying moving average (window=5)...");
    let smoothed = moving_average(&processed_data, 5);
    println!("   Smoothed data length: {}", smoothed.len());
    let smoothed_mean: f64 = smoothed.iter().sum::<f64>() / smoothed.len() as f64;
    println!("   Smoothed mean: {:.4}", smoothed_mean);

    println!("\n✅ Pipeline complete!");

    // Performance note
    println!("\n⚡ Performance Benefits:");
    println!("   • Memory safety: No buffer overflows or null pointer dereferences");
    println!("   • Zero-cost abstractions: Same performance as hand-written C");
    println!("   • SIMD acceleration: Automatic vectorization where beneficial");
    println!("   • Compile-time optimization: Link-time optimization (LTO) enabled");
}