aprender-core 0.29.3

Next-generation machine learning library in pure Rust
#![allow(clippy::disallowed_methods)]
//! Spec Checklist Tests - Section H: Full Lifecycle Tests (25 points)
//!
//! These tests verify claims from the spec's falsification checklist.
//! Each test is designed to FAIL if the claim is false.
//!
//! Reference: docs/specifications/archive/qwen2-0.5b-instruct-interactive-chat-demo.md

#![allow(unused_imports)]

use aprender::autograd::Tensor;
use aprender::demo::Qwen2Config;
use aprender::models::Qwen2Model;
use aprender::text::bpe::Qwen2BpeTokenizer;

// ============================================================================
// Section H: Full Lifecycle Tests (25 points)
// ============================================================================

/// H6: Inspect - verify model has required attributes
#[test]
fn h6_model_inspectable() {
    let config = Qwen2Config::qwen2_0_5b_instruct();

    // Architecture info
    assert!(config.hidden_size > 0, "H6 FAIL: hidden_size not set");
    assert!(
        config.num_attention_heads > 0,
        "H6 FAIL: num_attention_heads not set"
    );
    assert!(config.num_layers > 0, "H6 FAIL: num_layers not set");
    assert!(config.vocab_size > 0, "H6 FAIL: vocab_size not set");

    // Tokenizer info
    let tokenizer = Qwen2BpeTokenizer::new();
    assert!(
        tokenizer.vocab_size() > 0,
        "H6 FAIL: tokenizer vocab_size not available"
    );
}

// ============================================================================
// Section H Additional: Lifecycle Tests
// ============================================================================

/// H8: Tensor stats - verify we can compute tensor statistics
#[test]
fn h8_tensor_statistics() {
    use aprender::format::golden::LogitStats;

    // Create test logits
    let logits: Vec<f32> = (0..100).map(|i| (i as f32 * 0.1).sin() * 10.0).collect();

    let stats = LogitStats::compute(&logits);

    // Verify statistics are valid
    assert!(stats.mean.is_finite(), "H8 FAIL: Mean should be finite");
    assert!(stats.std.is_finite(), "H8 FAIL: Std should be finite");
    assert!(stats.min.is_finite(), "H8 FAIL: Min should be finite");
    assert!(stats.max.is_finite(), "H8 FAIL: Max should be finite");
    assert!(stats.argmax < logits.len(), "H8 FAIL: Argmax out of bounds");
    assert_eq!(stats.top5.len(), 5, "H8 FAIL: Should have top-5");

    // Verify min <= mean <= max
    assert!(
        stats.min <= stats.mean && stats.mean <= stats.max,
        "H8 FAIL: Invalid min/mean/max relationship"
    );
}

/// H1: HuggingFace import structure
#[test]
fn h1_hf_import_structure() {
    // Verify HF import infrastructure exists
    use aprender::format::Source;

    // Source parsing for HF URLs
    let source = Source::parse("hf://Qwen/Qwen2-0.5B-Instruct");
    assert!(source.is_ok(), "H1: HF source URL parses");

    let src = source.unwrap();
    assert!(
        matches!(src, Source::HuggingFace { .. }),
        "H1: Identified as HF source"
    );
}

/// H2: SafeTensors import structure
#[test]
fn h2_safetensors_import_structure() {
    // Verify SafeTensors import infrastructure
    use aprender::format::Source;

    let source = Source::parse("./model.safetensors");
    assert!(source.is_ok(), "H2: Local path parses");
}

/// H3: GGUF import structure
#[test]
fn h3_gguf_import_structure() {
    // Verify GGUF import infrastructure
    use aprender::format::Source;

    let source = Source::parse("./model.gguf");
    assert!(source.is_ok(), "H3: GGUF path parses");
}

/// H4: INT4 quantization infrastructure
#[test]
fn h4_int4_quantization_structure() {
    // Verify INT4 quantization types exist
    use aprender::format::QuantizationType;

    let quant = QuantizationType::Int4;
    assert!(
        matches!(quant, QuantizationType::Int4),
        "H4: INT4 quant type exists"
    );
}

/// H5: INT8 quantization infrastructure
#[test]
fn h5_int8_quantization_structure() {
    use aprender::format::QuantizationType;

    let quant = QuantizationType::Int8;
    assert!(
        matches!(quant, QuantizationType::Int8),
        "H5: INT8 quant type exists"
    );
}

/// H9: Compare HF structure
#[test]
fn h9_compare_hf_structure() {
    // Verify model params match expected HF values
    let config = Qwen2Config::qwen2_0_5b_instruct();

    // Parameter count calculation
    let embed_params = config.vocab_size * config.hidden_size;
    let layer_params = config.num_layers
        * (
            // Attention
            4 * config.hidden_size * config.hidden_size +
        // MLP
        3 * config.hidden_size * config.intermediate_size +
        // Layer norms
        2 * config.hidden_size
        );
    let total = embed_params + layer_params;

    // Qwen2-0.5B should have ~494M parameters
    let expected_min = 450_000_000;
    let expected_max = 550_000_000;

    assert!(
        total > expected_min && total < expected_max,
        "H9: Parameter count ({}) should be ~494M",
        total
    );
}

/// H15: Compile binary infrastructure
#[test]
fn h15_compile_binary_structure() {
    // Verify compilation infrastructure exists
    // The model can be serialized for embedding
    let config = Qwen2Config {
        hidden_size: 64,
        num_attention_heads: 4,
        num_kv_heads: 2,
        num_layers: 1,
        vocab_size: 100,
        max_seq_len: 32,
        intermediate_size: 128,
        rope_theta: 10000.0,
    };

    // Config can be formatted for embedding (Debug trait)
    let debug_str = format!("{:?}", config);
    assert!(
        debug_str.contains("hidden_size"),
        "H15: Config embeddable via Debug"
    );
    assert!(debug_str.contains("64"), "H15: Config contains values");
}

/// H20: WASM runtime execution
#[test]
fn h20_wasm_runtime_execution() {
    // Verify all operations are WASM-safe
    // No threading, no unsafe memory, no syscalls

    let tensor = Tensor::ones(&[4, 4]);
    let data = tensor.data();

    // All data is finite (no platform-specific NaN representations)
    assert!(
        data.iter().all(|x: &f32| x.is_finite()),
        "H20: All tensor values WASM-safe"
    );
}

/// H21: Export GGUF structure
#[test]
fn h21_export_gguf_structure() {
    use aprender::format::ExportFormat;

    let format = ExportFormat::Gguf;
    assert!(
        matches!(format, ExportFormat::Gguf),
        "H21: GGUF export format exists"
    );
}

/// H22: Export SafeTensors structure
#[test]
fn h22_export_safetensors_structure() {
    use aprender::format::ExportFormat;

    let format = ExportFormat::SafeTensors;
    assert!(
        matches!(format, ExportFormat::SafeTensors),
        "H22: SafeTensors export format exists"
    );
}

/// H23: Merge models structure
#[test]
fn h23_merge_models_structure() {
    use aprender::format::MergeStrategy;

    // Verify merge strategies exist
    let avg = MergeStrategy::Average;
    let weighted = MergeStrategy::Weighted;

    assert!(
        matches!(avg, MergeStrategy::Average),
        "H23: Average merge exists"
    );
    assert!(
        matches!(weighted, MergeStrategy::Weighted),
        "H23: Weighted merge exists"
    );
}

/// H24: Cross-compile verification
#[test]
fn h24_cross_compile_portability() {
    // Verify no platform-specific code in core types
    let config = Qwen2Config::default();

    // All sizes are explicit, not platform-dependent
    assert!(config.hidden_size > 0);
    assert!(config.vocab_size > 0);

    // Numeric types have fixed sizes
    let _: u32 = 0; // Tokens are u32
    let _: f32 = 0.0; // Weights are f32
}