realizar 0.8.5 - Docs.rs


    // =========================================================================
    // Fixture Creation Tests (verify fixtures work)
    // =========================================================================

    #[test]
    fn test_gguf_fixture_creates_valid_file() {
        let fixture = ModelFixture::gguf("test_gguf", ModelConfig::tiny());
        assert!(fixture.path().exists());
        let bytes = fixture.read_bytes().expect("read bytes");
        // GGUF magic: 0x46554747 = "GGUF"
        assert_eq!(&bytes[0..4], &[0x47, 0x47, 0x55, 0x46]);
    }

    #[test]
    fn test_apr_fixture_creates_valid_file() {
        let fixture = ModelFixture::apr("test_apr", ModelConfig::tiny());
        assert!(fixture.path().exists());
        let bytes = fixture.read_bytes().expect("read bytes");
        // APR magic: "APR\0"
        assert_eq!(&bytes[0..4], b"APR\x00");
    }

    #[test]
    fn test_safetensors_fixture_creates_valid_file() {
        let fixture = ModelFixture::safetensors("test_st", ModelConfig::tiny());
        assert!(fixture.path().exists());
        let bytes = fixture.read_bytes().expect("read bytes");
        // SafeTensors starts with header length (u64 LE)
        let header_len = u64::from_le_bytes(bytes[0..8].try_into().expect("type conversion"));
        assert!(header_len > 0 && header_len < 1_000_000);
    }

    // =========================================================================
    // Falsification Logic Tests
    // =========================================================================

    #[test]
    fn test_falsify_nan_detection() {
        let result = ForwardResult {
            logits: vec![1.0, f32::NAN, 3.0],
            format: ModelFormat::Gguf,
            device: Device::Cpu,
            tokens_processed: 1,
        };

        let checks = falsify(&result);
        let nan_check = checks.iter().find(|c| c.id == "F012").expect("nan_check");
        assert!(!nan_check.passed, "Should detect NaN");
    }

    #[test]
    fn test_falsify_inf_detection() {
        let result = ForwardResult {
            logits: vec![1.0, f32::INFINITY, 3.0],
            format: ModelFormat::Gguf,
            device: Device::Cpu,
            tokens_processed: 1,
        };

        let checks = falsify(&result);
        let inf_check = checks.iter().find(|c| c.id == "F013").expect("inf_check");
        assert!(!inf_check.passed, "Should detect Inf");
    }

    #[test]
    fn test_falsify_valid_output() {
        let result = ForwardResult {
            logits: vec![1.0, 2.0, 3.0],
            format: ModelFormat::Gguf,
            device: Device::Cpu,
            tokens_processed: 1,
        };

        let checks = falsify(&result);
        for check in &checks {
            assert!(check.passed, "{} should pass: {}", check.id, check.details);
        }
    }

    #[test]
    fn test_parity_argmax_match() {
        let a = ForwardResult {
            logits: vec![1.0, 5.0, 3.0],
            format: ModelFormat::Gguf,
            device: Device::Cpu,
            tokens_processed: 1,
        };
        let b = ForwardResult {
            logits: vec![1.1, 5.1, 3.1],
            format: ModelFormat::Apr,
            device: Device::Cpu,
            tokens_processed: 1,
        };

        let checks = falsify_parity(&a, &b);
        let argmax_check = checks.iter().find(|c| c.id == "F015").expect("argmax_check");
        assert!(argmax_check.passed, "Argmax should match (both = 1)");
    }

    #[test]
    fn test_parity_argmax_mismatch() {
        let a = ForwardResult {
            logits: vec![1.0, 5.0, 3.0],
            format: ModelFormat::Gguf,
            device: Device::Cpu,
            tokens_processed: 1,
        };
        let b = ForwardResult {
            logits: vec![10.0, 5.0, 3.0],
            format: ModelFormat::Apr,
            device: Device::Cpu,
            tokens_processed: 1,
        };

        let checks = falsify_parity(&a, &b);
        let argmax_check = checks.iter().find(|c| c.id == "F015").expect("argmax_check");
        assert!(!argmax_check.passed, "Argmax should NOT match (1 vs 0)");
    }

    // =========================================================================
    // T001-T006: Format×Device Matrix Tests (Synthetic Fixtures)
    // NOTE: These test fixture generation, NOT inference. See T100+ for real model tests.
    // =========================================================================

    #[test]
    fn t001_gguf_cpu_forward_synthetic() {
        let fixture = ModelFixture::gguf("t001", ModelConfig::tiny());

        match forward_gguf_cpu(&fixture, TEST_TOKENS) {
            Ok(result) => {
                eprintln!(
                    "[T001] GGUF:CPU (synthetic) produced {} logits",
                    result.logits.len()
                );
                let checks = falsify(&result);
                for check in &checks {
                    if !check.passed {
                        eprintln!("[T001] FALSIFIED {}: {}", check.id, check.details);
                    }
                }
            },
            Err(e) => {
                eprintln!("[T001] GGUF:CPU (synthetic) FIXTURE BUG: {}", e);
                // Expected - synthetic fixture has known issues
            },
        }
    }

    // =========================================================================
    // T100+: Real Model Falsification Tests (Popperian)
    // These use known-good artifacts to test the inference engine itself.
    // =========================================================================

    /// T100: GGUF inference on real Qwen2-0.5B-Instruct Q4_0 model
    ///
    /// This is the TRUE falsification test. If this fails, the inference engine is broken.
    /// If this passes but T001 fails, the fixture generator is broken (not the engine).
    #[test]
    fn t100_gguf_cpu_real_qwen2() {
        use std::path::Path;

        // Use artifacts path - run scripts/sync-models.sh to populate
        let project_root = Path::new(env!("CARGO_MANIFEST_DIR"));
        let model_path = project_root.join("artifacts/models/qwen2-0.5b-q4_0.gguf");

        if !model_path.exists() {
            eprintln!("[T100] SKIPPED: Model not found. Run: ./scripts/sync-models.sh");
            return;
        }

        // Use a simple token sequence
        let tokens: &[u32] = &[151643, 872, 198]; // <|im_start|>user\n in Qwen2 tokenizer

        match forward_gguf_cpu_path(&model_path, tokens) {
            Ok(result) => {
                let sum: f32 = result.logits.iter().sum();
                let max = result
                    .logits
                    .iter()
                    .cloned()
                    .fold(f32::NEG_INFINITY, f32::max);
                let min = result.logits.iter().cloned().fold(f32::INFINITY, f32::min);
                let argmax = result.argmax();

                eprintln!(
                    "[T100] GGUF:CPU (real Qwen2) produced {} logits",
                    result.logits.len()
                );
                eprintln!(
                    "[T100] sum={:.4}, min={:.4}, max={:.4}, argmax={:?}",
                    sum, min, max, argmax
                );

                let checks = falsify(&result);
                let mut all_passed = true;
                for check in &checks {
                    if !check.passed {
                        eprintln!("[T100] FALSIFIED {}: {}", check.id, check.details);
                        all_passed = false;
                    }
                }

                if all_passed {
                    eprintln!("[T100] ✓ CORROBORATED: GGUF inference engine works on real model");
                }

                // Hard assertion - real model MUST produce valid output
                assert!(
                    !result.has_nan(),
                    "Real model produced NaN - inference engine FALSIFIED"
                );
                assert!(
                    !result.has_inf(),
                    "Real model produced Inf - inference engine FALSIFIED"
                );
                assert!(
                    !result.logits.is_empty(),
                    "Real model produced empty logits - FALSIFIED"
                );
            },
            Err(e) => {
                panic!(
                    "[T100] GGUF:CPU (real Qwen2) FAILED: {} - INFERENCE ENGINE FALSIFIED",
                    e
                );
            },
        }
    }

    #[test]
    fn t003_apr_cpu_forward() {
        let fixture = ModelFixture::apr("t003", ModelConfig::tiny());

        match forward_apr_cpu(&fixture, TEST_TOKENS) {
            Ok(result) => {
                eprintln!("[T003] APR:CPU produced {} logits", result.logits.len());
                let checks = falsify(&result);
                for check in &checks {
                    if !check.passed {
                        eprintln!("[T003] FALSIFIED {}: {}", check.id, check.details);
                    }
                }
            },
            Err(e) => {
                eprintln!("[T003] APR:CPU FAILED TO LOAD/RUN: {}", e);
            },
        }
    }

    /// T200: SafeTensors inference on real Qwen2-0.5B model
    ///
    /// Tests the SafeTensors inference path with a real model from artifacts.
    #[test]
    fn t200_safetensors_cpu_real_qwen2() {
        use std::path::Path;

        // Use artifacts path - run scripts/sync-models.sh to populate
        let project_root = Path::new(env!("CARGO_MANIFEST_DIR"));
        let model_path = project_root.join("artifacts/models/qwen2-0.5b.safetensors");

        if !model_path.exists() {
            eprintln!("[T200] SKIPPED: Model not found. Run: ./scripts/sync-models.sh");
            return;
        }

        // Follow symlink to get the actual safetensors file in HuggingFace cache
        // Structure: artifacts/model.safetensors -> HF/snapshots/xxx/model.safetensors
        // The converter expects the file path, and finds config.json as sibling
        let st_file = std::fs::read_link(&model_path).unwrap_or_else(|_| model_path.clone());

        // Verify config.json exists as sibling
        let config_path = st_file.parent().map(|p| p.join("config.json"));
        if config_path.as_ref().is_none_or(|p| !p.exists()) {
            eprintln!(
                "[T200] SKIPPED: config.json not found as sibling of {}",
                st_file.display()
            );
            return;
        }

        // Use same tokens as T100 for comparison
        let tokens: &[u32] = &[151643, 872, 198]; // <|im_start|>user\n in Qwen2 tokenizer

        match crate::safetensors_infer::SafetensorsToAprConverter::convert(&st_file) {
            Ok(transformer) => match transformer.forward(tokens) {
                Ok(logits) => {
                    let sum: f32 = logits.iter().sum();
                    let max = logits.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
                    let min = logits.iter().cloned().fold(f32::INFINITY, f32::min);
                    let argmax = logits
                        .iter()
                        .enumerate()
                        .max_by(|(_, a), (_, b)| {
                            a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
                        })
                        .map(|(idx, _)| idx);

                    eprintln!(
                        "[T200] SafeTensors:CPU (real Qwen2) produced {} logits",
                        logits.len()
                    );
                    eprintln!(
                        "[T200] sum={:.4}, min={:.4}, max={:.4}, argmax={:?}",
                        sum, min, max, argmax
                    );

                    let has_nan = logits.iter().any(|x| x.is_nan());
                    let has_inf = logits.iter().any(|x| x.is_infinite());

                    assert!(!has_nan, "Real SafeTensors model produced NaN - FALSIFIED");
                    assert!(!has_inf, "Real SafeTensors model produced Inf - FALSIFIED");
                    assert!(
                        !logits.is_empty(),
                        "Real SafeTensors model produced empty logits - FALSIFIED"
                    );

                    eprintln!("[T200] ✓ CORROBORATED: SafeTensors inference works on real model");
                },
                Err(e) => {
                    panic!("[T200] SafeTensors forward FAILED: {} - FALSIFIED", e);
                },
            },
            Err(e) => {
                panic!("[T200] SafeTensors load FAILED: {} - FALSIFIED", e);
            },
        }
    }

    /// T201: APR inference (PMAT-111: Now EMPIRICAL via synthetic fixture)
    ///
    /// This test validates the APR loader and forward pass using:
    /// 1. Real APR model at `artifacts/models/qwen2-0.5b.apr` if available
    /// 2. Synthetic fixture as fallback (zero weights → produces garbage, but RUNS)
    ///
    /// Status: EMPIRICAL (tests APR loader schema resilience + forward pass)
    #[test]
    fn t201_apr_cpu_real_model() {
        use std::path::Path;

        // Try real APR model first
        let project_root = Path::new(env!("CARGO_MANIFEST_DIR"));
        let model_path = project_root.join("artifacts/models/qwen2-0.5b.apr");

        if model_path.exists() {
            // Real model path - test with real weights
            eprintln!("[T201] Found real APR model at {}", model_path.display());
            let tokens: &[u32] = &[151643, 872, 198];

            match forward_apr_cpu_path(&model_path, tokens) {
                Ok(result) => {
                    eprintln!("[T201] APR:CPU produced {} logits", result.logits.len());
                    assert!(!result.has_nan(), "Real APR model produced NaN - FALSIFIED");
                    assert!(!result.has_inf(), "Real APR model produced Inf - FALSIFIED");
                    assert!(
                        !result.logits.is_empty(),
                        "Real APR model produced empty logits - FALSIFIED"
                    );

                    let argmax = result.argmax();
                    eprintln!("[T201] argmax = {:?}", argmax);

                    if argmax == Some(262) {
                        eprintln!(
                            "[T201] ✓ CORROBORATED: APR matches GGUF/SafeTensors (argmax=262)"
                        );
                        eprintln!("[T201] APR has FULL PARITY with other formats!");
                    } else {
                        eprintln!(
                            "[T201] ✓ CORROBORATED: APR inference runs (argmax != 262, no parity)"
                        );
                    }
                    return;
                },
                Err(e) => {
                    panic!("[T201] APR:CPU FAILED on real model: {} - FALSIFIED", e);
                },
            }
        }

        // Fallback: Use synthetic fixture (PMAT-111 fix)
        eprintln!("[T201] Real APR model not found, using synthetic fixture");
        eprintln!("[T201] Testing APR loader + forward with zero weights (expect garbage output)");

        // Use tiny config for fast test
        let config = ModelConfig::tiny();
        let fixture = ModelFixture::apr("t201_synthetic", config);

        match forward_apr_cpu(&fixture, TEST_TOKENS) {
            Ok(result) => {
                eprintln!(
                    "[T201] APR:CPU (synthetic) produced {} logits",
                    result.logits.len()
                );

                // With zero weights, output should be all zeros (no NaN/Inf)
                assert!(
                    !result.has_nan(),
                    "Synthetic APR model produced NaN - FALSIFIED"
                );
                assert!(
                    !result.has_inf(),
                    "Synthetic APR model produced Inf - FALSIFIED"
                );
                assert!(
                    !result.logits.is_empty(),
                    "Synthetic APR model produced empty logits - FALSIFIED"
                );

                eprintln!("[T201] ✓ CORROBORATED: APR loader + forward RUNS");
                eprintln!("[T201] Status: EMPIRICAL (APR is now testable)");
                eprintln!(
                    "[T201] Note: Output is garbage (zero weights), but pipeline is verified"
                );
            },
            Err(e) => {
                panic!(
                    "[T201] APR:CPU FAILED on synthetic fixture: {} - APR LOADER FALSIFIED",
                    e
                );
            },
        }
    }