ruchy 4.1.2

A systems scripting language that transpiles to idiomatic Rust with extreme quality engineering
Documentation
// Data Science Workflow Example for Ruchy Notebooks
// This file demonstrates typical data science operations

// Data loading and preprocessing
let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
println("Original data: " + data.to_string())

// Statistical operations
fun mean(numbers) {
    let sum = 0
    for num in numbers {
        sum = sum + num
    }
    sum / numbers.length()
}

fun variance(numbers) {
    let avg = mean(numbers)
    let sum_sq_diff = 0
    for num in numbers {
        let diff = num - avg
        sum_sq_diff = sum_sq_diff + (diff * diff)
    }
    sum_sq_diff / numbers.length()
}

// Calculate statistics
let data_mean = mean(data)
let data_variance = variance(data)

println("Mean: " + data_mean.to_string())
println("Variance: " + data_variance.to_string())

// Data transformation
fun normalize(numbers) {
    let avg = mean(numbers)
    let std_dev = variance(numbers).sqrt()
    numbers.map(|x| (x - avg) / std_dev)
}

let normalized_data = normalize(data)
println("Normalized data: " + normalized_data.to_string())

// Simple linear regression example
fun linear_regression(x_values, y_values) {
    let n = x_values.length()
    let sum_x = x_values.sum()
    let sum_y = y_values.sum()
    let sum_xy = 0
    let sum_x_sq = 0
    
    for i in 0..n {
        sum_xy = sum_xy + (x_values[i] * y_values[i])
        sum_x_sq = sum_x_sq + (x_values[i] * x_values[i])
    }
    
    let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x_sq - sum_x * sum_x)
    let intercept = (sum_y - slope * sum_x) / n
    
    {slope: slope, intercept: intercept}
}

// Example data for regression
let x_data = [1, 2, 3, 4, 5]
let y_data = [2, 4, 6, 8, 10]  // Perfect linear relationship

let regression_result = linear_regression(x_data, y_data)
println("Regression - Slope: " + regression_result.slope.to_string())
println("Regression - Intercept: " + regression_result.intercept.to_string())

// Prediction function
fun predict(x, model) {
    model.slope * x + model.intercept
}

let prediction = predict(6, regression_result)
println("Prediction for x=6: " + prediction.to_string())

// Data quality checks
fun check_data_quality(numbers) {
    let issues = []
    
    // Check for missing values (represented as null or very large negative numbers)
    let missing_count = 0
    for num in numbers {
        if num < -999999 {
            missing_count = missing_count + 1
        }
    }
    
    // Check for outliers (simple z-score method)
    let avg = mean(numbers)
    let std_dev = variance(numbers).sqrt()
    let outlier_count = 0
    
    for num in numbers {
        let z_score = (num - avg) / std_dev
        if z_score.abs() > 3 {
            outlier_count = outlier_count + 1
        }
    }
    
    {
        missing_values: missing_count,
        outliers: outlier_count,
        total_records: numbers.length(),
        data_quality_score: (numbers.length() - missing_count - outlier_count) / numbers.length()
    }
}

let quality_report = check_data_quality(data)
println("Data Quality Report:")
println("  Missing values: " + quality_report.missing_values.to_string())
println("  Outliers: " + quality_report.outliers.to_string()) 
println("  Total records: " + quality_report.total_records.to_string())
println("  Quality score: " + quality_report.data_quality_score.to_string())

// Final summary
println("\nšŸŽ‰ Data Science Workflow Complete!")
println("šŸ“Š Analysis Summary:")
println("   • Dataset size: " + data.length().to_string() + " records")
println("   • Mean value: " + data_mean.to_string())
println("   • Data quality: " + (quality_report.data_quality_score * 100).to_string() + "%")
println("   • Linear model: y = " + regression_result.slope.to_string() + "x + " + regression_result.intercept.to_string())