aprender-core 0.29.3

Next-generation machine learning library in pure Rust
#![allow(clippy::disallowed_methods)]
//! PMAT-235: Validated Tensors - Compile-Time Contract Enforcement
//!
//! Demonstrates the Poka-Yoke (mistake-proofing) pattern for tensor validation.
//! This makes it IMPOSSIBLE to use invalid tensor data at compile time.
//!
//! # Theoretical Foundation
//!
//! - Shingo, S. (1986). *Zero Quality Control: Source Inspection and the
//!   Poka-Yoke System*. Productivity Press.
//! - Brady, E. (2017). *Type-Driven Development with Idris*. Manning.
//! - Parsons, A. (2019). "Parse, Don't Validate"
//!
//! # Key Concepts
//!
//! - **ValidatedEmbedding**: Embedding tensor that MUST pass validation to exist
//! - **ValidatedWeight**: Weight matrix that MUST pass validation to exist
//! - **ValidatedVector**: 1D tensor that MUST pass validation to exist
//! - **Poka-Yoke**: Inner data is private - no way to construct without validation
//!
//! # Usage
//!
//! ```bash
//! cargo run --example validated_tensors
//! ```

use aprender::format::{
    ContractValidationError, ValidatedEmbedding, ValidatedTensorStats, ValidatedVector,
    ValidatedWeight,
};

fn main() {
    println!("═══════════════════════════════════════════════════════════════════");
    println!("     PMAT-235: Validated Tensors - Compile-Time Enforcement");
    println!("═══════════════════════════════════════════════════════════════════\n");

    // Demo 1: Valid embedding passes all gates
    demo_valid_embedding();

    // Demo 2: Invalid embedding (too many zeros) - rejected at construction
    demo_density_rejection();

    // Demo 3: NaN values - rejected at construction
    demo_nan_rejection();

    // Demo 4: Spot check catches offset bugs
    demo_spot_check();

    // Demo 5: ValidatedWeight example
    demo_validated_weight();

    // Demo 6: ValidatedVector example
    demo_validated_vector();

    // Demo 7: Full Falsification Scorecard
    demo_falsification_scorecard();

    println!("\n═══════════════════════════════════════════════════════════════════");
    println!("  Key Insight: Invalid tensors CANNOT exist - Poka-Yoke enforced!");
    println!("═══════════════════════════════════════════════════════════════════");
}

/// Demo 1: Creating a valid embedding
fn demo_valid_embedding() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 1: Valid Embedding (Passes All Gates)                      │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let vocab_size = 100;
    let hidden_dim = 64;

    // Create valid embedding data with realistic distribution
    let data: Vec<f32> = (0..vocab_size * hidden_dim)
        .map(|i| (i as f32 * 0.01).sin() * 0.1)
        .collect();

    match ValidatedEmbedding::new(data, vocab_size, hidden_dim) {
        Ok(embedding) => {
            println!("  ✅ ValidatedEmbedding created successfully!");
            println!("     vocab_size: {}", embedding.vocab_size());
            println!("     hidden_dim: {}", embedding.hidden_dim());
            print_stats(embedding.stats());
        }
        Err(e) => {
            println!("  ❌ Unexpected error: {e}");
        }
    }
    println!();
}

/// Demo 2: Density rejection (PMAT-234 bug)
fn demo_density_rejection() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 2: Density Rejection (Catches PMAT-234 Bug)                │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let vocab_size = 1000;
    let hidden_dim = 64;

    // Simulate PMAT-234 bug: 94.5% of data is zeros
    let mut data = vec![0.0f32; vocab_size * hidden_dim];
    // Only last 5.5% non-zero (simulating offset bug)
    for i in (945 * hidden_dim)..(vocab_size * hidden_dim) {
        data[i] = 0.1;
    }

    println!("  Creating embedding with 94.5% zeros (simulates offset bug)...");

    match ValidatedEmbedding::new(data, vocab_size, hidden_dim) {
        Ok(_) => {
            println!("  ❌ Unexpected: Should have been rejected!");
        }
        Err(e) => {
            println!("  ✅ Correctly rejected!");
            print_error(&e);
        }
    }
    println!();
}

/// Demo 3: NaN rejection
fn demo_nan_rejection() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 3: NaN Rejection (F-DATA-QUALITY-002)                      │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let vocab_size = 10;
    let hidden_dim = 8;

    // Create data with a NaN value
    let mut data: Vec<f32> = (0..vocab_size * hidden_dim)
        .map(|i| i as f32 * 0.01)
        .collect();
    data[5] = f32::NAN;

    println!("  Creating embedding with NaN value at index 5...");

    match ValidatedEmbedding::new(data, vocab_size, hidden_dim) {
        Ok(_) => {
            println!("  ❌ Unexpected: Should have been rejected!");
        }
        Err(e) => {
            println!("  ✅ Correctly rejected!");
            print_error(&e);
        }
    }
    println!();
}

/// Demo 4: Spot check catches offset bugs
fn demo_spot_check() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 4: Spot Check (F-DATA-QUALITY-004)                         │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let vocab_size = 100;
    let hidden_dim = 64;

    // Create mostly valid data
    let mut data: Vec<f32> = (0..vocab_size * hidden_dim)
        .map(|i| (i as f32 * 0.01).sin() * 0.1)
        .collect();

    // Zero out token at 10% of vocab (token 10)
    let token_10_start = 10 * hidden_dim;
    for i in token_10_start..(token_10_start + hidden_dim) {
        data[i] = 0.0;
    }

    println!("  Creating embedding with zero token at 10% position...");
    println!("  (Spot check samples tokens at 10%, 50%, 90% of vocab)");

    match ValidatedEmbedding::new(data, vocab_size, hidden_dim) {
        Ok(_) => {
            println!("  ❌ Unexpected: Should have been rejected!");
        }
        Err(e) => {
            println!("  ✅ Correctly rejected by spot check!");
            print_error(&e);
        }
    }
    println!();
}

/// Demo 5: ValidatedWeight example
fn demo_validated_weight() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 5: ValidatedWeight                                         │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let out_dim = 100;
    let in_dim = 64;

    // Valid weight matrix
    let good_data: Vec<f32> = (0..out_dim * in_dim).map(|i| i as f32 * 0.001).collect();

    println!("  Creating valid weight matrix...");
    match ValidatedWeight::new(good_data, out_dim, in_dim, "layer1.weight") {
        Ok(weight) => {
            println!("  ✅ ValidatedWeight created: {}", weight.name());
            println!("     shape: [{}, {}]", weight.out_dim(), weight.in_dim());
        }
        Err(e) => {
            println!("  ❌ Unexpected error: {e}");
        }
    }

    // Invalid weight matrix (all zeros)
    let bad_data = vec![0.0f32; out_dim * in_dim];

    println!("\n  Creating all-zero weight matrix...");
    match ValidatedWeight::new(bad_data, out_dim, in_dim, "broken.weight") {
        Ok(_) => {
            println!("  ❌ Unexpected: Should have been rejected!");
        }
        Err(e) => {
            println!("  ✅ Correctly rejected!");
            print_error(&e);
        }
    }
    println!();
}

/// Demo 6: ValidatedVector example
fn demo_validated_vector() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 6: ValidatedVector (for 1D tensors)                        │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    let expected_len = 100;

    // Valid vector
    let good_data = vec![1.0f32; expected_len];

    println!("  Creating valid norm weight vector...");
    match ValidatedVector::new(good_data, expected_len, "layer1.norm.weight") {
        Ok(vec) => {
            println!("  ✅ ValidatedVector created: {}", vec.name());
            println!("     length: {}", vec.data().len());
        }
        Err(e) => {
            println!("  ❌ Unexpected error: {e}");
        }
    }

    // Wrong length
    let bad_data = vec![1.0f32; 50];

    println!("\n  Creating vector with wrong length...");
    match ValidatedVector::new(bad_data, expected_len, "broken.norm") {
        Ok(_) => {
            println!("  ❌ Unexpected: Should have been rejected!");
        }
        Err(e) => {
            println!("  ✅ Correctly rejected!");
            print_error(&e);
        }
    }
    println!();
}

/// Print tensor statistics
fn print_stats(stats: &ValidatedTensorStats) {
    println!("     Statistics:");
    println!("       elements: {}", stats.len);
    println!("       zero_pct: {:.1}%", stats.zero_pct());
    println!("       min: {:.4}, max: {:.4}", stats.min, stats.max);
    println!("       L2 norm: {:.4}", stats.l2_norm);
}

/// Demo 7: Full Falsification Scorecard (all 8 FALSIFY tests)
fn demo_falsification_scorecard() {
    println!("┌─────────────────────────────────────────────────────────────────┐");
    println!("│ Demo 7: Popperian Falsification Scorecard                       │");
    println!("└─────────────────────────────────────────────────────────────────┘\n");

    println!("  Per Popper (1959), a spec is scientific iff it makes falsifiable");
    println!("  predictions. All 8 contract tests attempt to DISPROVE claims.\n");

    let tests = [
        (
            "FALSIFY-001",
            "Embedding density gate",
            "src/format/validated_tensors.rs",
        ),
        (
            "FALSIFY-002",
            "Type enforcement (Poka-Yoke)",
            "compile-time: private fields",
        ),
        (
            "FALSIFY-003",
            "NaN/Inf rejection",
            "src/format/validated_tensors.rs",
        ),
        (
            "FALSIFY-004",
            "Spot check offset bugs",
            "src/format/validated_tensors.rs",
        ),
        (
            "FALSIFY-005",
            "lm_head shape enforcement",
            "src/format/validated_tensors.rs",
        ),
        (
            "FALSIFY-006",
            "Cross-crate parity (13 tests)",
            "apr-cli/tests/falsification_cross_crate_parity.rs",
        ),
        (
            "FALSIFY-007",
            "No catch-all in dispatch",
            "realizar/src/quantize/contract_tests.rs",
        ),
        (
            "FALSIFY-008",
            "Wrong-kernel garbage (2 tests)",
            "realizar/src/quantize/contract_tests.rs",
        ),
    ];

    for (id, desc, location) in &tests {
        println!("  PASS  {id:<14} {desc}");
        println!("        Location: {location}");
    }

    println!("\n  Total: 52 falsification tests across 3 test files");
    println!("  Contract: contracts/tensor-layout-v1.yaml");
    println!("  Run: cargo test --test falsification_cross_crate_parity --features inference");
}

/// Print validation error
fn print_error(e: &ContractValidationError) {
    println!("     Rule: {}", e.rule_id);
    println!("     Tensor: {}", e.tensor_name);
    println!("     Error: {}", e.message);
}