aprender-core 0.31.2

Next-generation machine learning library in pure Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#![allow(clippy::disallowed_methods)]
//! Spec Checklist Tests - Section 19: High-Performance APR Inference
//!
//! These tests verify claims from the spec's falsification checklist.
//! Each test is designed to FAIL if the claim is false.
//!
//! Reference: docs/specifications/archive/qwen2-0.5b-instruct-interactive-chat-demo.md

#![allow(unused_imports)]

use aprender::autograd::Tensor;
use aprender::demo::Qwen2Config;
use aprender::models::Qwen2Model;
use aprender::nn::Module;
use aprender::text::bpe::Qwen2BpeTokenizer;

// ============================================================================
// Section 19: High-Performance APR Inference (TinyLlama & QwenCoder)
// Spec: docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md
// Focus: Verify apr-cli serving capabilities and performance targets
// ============================================================================

/// Z1: TinyLlama-1.1B imports to APR
/// Falsification: `apr import` fails or produces invalid APR file
#[test]
fn z1_tinyllama_imports_to_apr() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify TinyLlama import documented as fixed
    assert!(
        spec.contains("TinyLlama") && spec.contains("import"),
        "Z1: Spec must document TinyLlama import capability"
    );

    // Verify RMSNorm validation fix documented
    assert!(
        spec.contains("RMSNorm") && spec.contains("validation"),
        "Z1: Spec must document RMSNorm validation fix"
    );

    // Check import command exists
    let import_cmd = "crates/apr-cli/src/commands/import.rs";
    if let Ok(content) = std::fs::read_to_string(import_cmd) {
        assert!(
            content.contains("safetensors") || content.contains("import"),
            "Z1: Import command must handle safetensors"
        );
    }
}

/// Z2: Qwen2.5-Coder-0.5B imports to APR
/// Falsification: `apr import` fails or produces invalid APR file
#[test]
fn z2_qwencoder_imports_to_apr() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify Qwen2.5-Coder import documented
    assert!(
        spec.contains("Qwen2.5-Coder") || spec.contains("QwenCoder"),
        "Z2: Spec must document Qwen2.5-Coder import"
    );

    // Verify --arch qwen2 support documented
    assert!(
        spec.contains("--arch qwen2") || spec.contains("Architecture::Qwen2"),
        "Z2: Spec must document --arch qwen2 support"
    );

    // Check format module has Qwen2 architecture mapping
    let format_path = "src/format/mod.rs";
    if let Ok(content) = std::fs::read_to_string(format_path) {
        // Should have architecture enum or mapping
        assert!(
            content.contains("Qwen2") || content.contains("Architecture"),
            "Z2: Format module should support Qwen2 architecture"
        );
    }
}

/// Z3: TinyLlama Serving (HTTP)
/// Falsification: `apr serve tinyllama.apr` fails to handle concurrent requests
#[test]
fn z3_tinyllama_serving_http() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify serving documented
    assert!(
        spec.contains("apr serve") || spec.contains("Serving"),
        "Z3: Spec must document apr serve command"
    );

    // Verify APR v1 magic compatibility documented
    assert!(
        spec.contains("v1_compat") || spec.contains("APR2") || spec.contains("APRN"),
        "Z3: Spec must document APR v1/v2 magic compatibility"
    );

    // Check serve command exists
    let serve_cmd = "crates/apr-cli/src/commands/serve.rs";
    if std::fs::metadata(serve_cmd).is_ok() {
        let content = std::fs::read_to_string(serve_cmd).expect("serve.rs exists");
        assert!(
            content.contains("async") || content.contains("tokio") || content.contains("axum"),
            "Z3: Serve command must use async runtime"
        );
    }
}

/// Z4: QwenCoder Serving (HTTP)
/// Falsification: `apr serve qwencoder.apr` fails code completion request
#[test]
fn z4_qwencoder_serving_http() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify code completion use case documented
    assert!(
        spec.contains("code completion")
            || spec.contains("Code generation")
            || spec.contains("IDE"),
        "Z4: Spec must document code completion use case"
    );

    // Verify QwenCoder serving documented
    assert!(
        spec.contains("Qwen") && (spec.contains("serve") || spec.contains("Serving")),
        "Z4: Spec must document Qwen serving capability"
    );
}

/// Z5: TinyLlama CPU Performance
/// Falsification: Decode < 60 tok/s (Av. Desktop)
#[test]
fn z5_tinyllama_cpu_performance() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify TinyLlama performance target documented
    assert!(
        spec.contains("tok/s") && spec.contains("TinyLlama"),
        "Z5: Spec must document TinyLlama performance targets"
    );

    // Verify --fast flag for realizar path documented
    assert!(
        spec.contains("--fast") || spec.contains("realizar"),
        "Z5: Spec must document --fast flag for optimized inference"
    );

    // Verify performance exceeds threshold
    // From spec: "206.4 tok/s on TinyLlama (4x threshold)"
    assert!(
        spec.contains("206") || spec.contains("185") || spec.contains("352"),
        "Z5: Spec must document achieved performance > 60 tok/s"
    );
}

/// Z6: QwenCoder CPU Performance
/// Falsification: Decode < 70 tok/s (Av. Desktop)
#[test]
fn z6_qwencoder_cpu_performance() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify Qwen performance documented
    assert!(
        spec.contains("tok/s") && (spec.contains("Qwen") || spec.contains("QwenCoder")),
        "Z6: Spec must document Qwen performance targets"
    );

    // Verify realizar-based inference path
    assert!(
        spec.contains("realizar") && spec.contains("inference"),
        "Z6: Spec must document realizar-based inference"
    );
}

/// Z7: Server Latency (TTFT)
/// Falsification: TTFT > 50ms (local)
#[test]
fn z7_server_latency_ttft() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify TTFT (Time To First Token) latency requirement documented
    assert!(
        spec.contains("TTFT") || spec.contains("latency") || spec.contains("50ms"),
        "Z7: Spec must document TTFT latency requirement"
    );

    // Verify server latency target
    assert!(
        spec.contains("Server") || spec.contains("serve"),
        "Z7: Spec must document server latency expectations"
    );
}

/// Z8: QwenCoder Accuracy
/// Falsification: Generated code fails basic syntax check
#[test]
fn z8_qwencoder_accuracy() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify code generation quality documented
    assert!(
        spec.contains("Code") && (spec.contains("generation") || spec.contains("completion")),
        "Z8: Spec must document code generation capability"
    );

    // Verify quality expectations
    assert!(
        spec.contains("syntax") || spec.contains("quality") || spec.contains("accuracy"),
        "Z8: Spec must document quality expectations"
    );
}

/// Z9: High-Load Stability
/// Falsification: Server crashes under 50 concurrent connections
#[test]
fn z9_high_load_stability() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify concurrency/stability documented
    assert!(
        spec.contains("concurrent") || spec.contains("stability") || spec.contains("load"),
        "Z9: Spec must document concurrency/stability requirements"
    );

    // Check if serve command has concurrency handling
    let serve_cmd = "crates/apr-cli/src/commands/serve.rs";
    if let Ok(content) = std::fs::read_to_string(serve_cmd) {
        // Should use async or have connection handling
        assert!(
            content.contains("async") || content.contains("spawn") || content.contains("tokio"),
            "Z9: Serve command must support concurrent connections"
        );
    }
}

/// Z10: Zero-Overhead Serving
/// Falsification: Serving tokens/sec within 5% of `apr bench`
#[test]
fn z10_zero_overhead_serving() {
    let spec_path = "docs/specifications/archive/apr-whisper-and-cookbook-support-eoy-2025.md";
    let spec = std::fs::read_to_string(spec_path).expect("Specification should exist");

    // Verify overhead expectations documented
    assert!(
        spec.contains("overhead") || spec.contains("bench") || spec.contains("5%"),
        "Z10: Spec must document serving overhead expectations"
    );

    // Verify benchmark comparison methodology
    assert!(
        spec.contains("apr bench") || spec.contains("benchmark"),
        "Z10: Spec must document benchmark comparison"
    );
}

// ============================================================================
// Section 19 Import Infrastructure Tests
// Verify complete import pipeline code without requiring model files
// ============================================================================

/// Verify Qwen2-0.5B-Instruct import infrastructure is complete
#[test]
fn z_import_qwen2_0_5b_instruct_infra() {
    use aprender::format::{Architecture, ImportOptions, Source, ValidationConfig};

    // 1. Source parsing for HuggingFace URL
    let source = Source::parse("hf://Qwen/Qwen2-0.5B-Instruct").unwrap();
    match source {
        Source::HuggingFace { org, repo, .. } => {
            assert_eq!(org, "Qwen", "HF org should be Qwen");
            assert_eq!(repo, "Qwen2-0.5B-Instruct", "HF repo should match");
        }
        _ => panic!("Should parse as HuggingFace source"),
    }

    // 2. Architecture support
    // PMAT-099: Preserve model. prefix for AprTransformer compatibility
    let arch = Architecture::Qwen2;
    let mapped = arch.map_name("model.embed_tokens.weight");
    assert_eq!(
        mapped, "model.embed_tokens.weight",
        "Qwen2 preserves model. prefix"
    );

    // 3. Import options
    let options = ImportOptions {
        architecture: Architecture::Qwen2,
        validation: ValidationConfig::Strict,
        quantize: None,
        compress: None,
        strict: false,
        cache: true,
        tokenizer_path: None,
        allow_no_config: false,
    };
    assert_eq!(options.architecture, Architecture::Qwen2);

    // 4. Config verification
    let config = Qwen2Config::qwen2_0_5b_instruct();
    assert_eq!(config.hidden_size, 896);
    assert_eq!(config.vocab_size, 151936);
}

/// Verify Qwen2.5-Coder-0.5B import infrastructure is complete
#[test]
fn z_import_qwen25_coder_0_5b_infra() {
    use aprender::format::{Architecture, ImportOptions, Source, ValidationConfig};

    // 1. Source parsing for HuggingFace URL
    let source = Source::parse("hf://Qwen/Qwen2.5-Coder-0.5B-Instruct").unwrap();
    match source {
        Source::HuggingFace { org, repo, .. } => {
            assert_eq!(org, "Qwen", "HF org should be Qwen");
            assert_eq!(repo, "Qwen2.5-Coder-0.5B-Instruct", "HF repo should match");
        }
        _ => panic!("Should parse as HuggingFace source"),
    }

    // 2. Architecture support (same as Qwen2)
    let arch = Architecture::Qwen2;
    let mapped = arch.map_name("model.layers.0.mlp.gate_proj.weight");
    assert!(
        mapped.contains("mlp.gate_proj.weight"),
        "Qwen2 maps MLP names"
    );

    // 3. Import options with quantization
    let options = ImportOptions {
        architecture: Architecture::Qwen2,
        validation: ValidationConfig::Basic,
        quantize: Some(aprender::format::converter::QuantizationType::Int4),
        compress: None,
        strict: false,
        cache: false,
        tokenizer_path: None,
        allow_no_config: false,
    };
    assert!(options.quantize.is_some(), "INT4 quantization supported");

    // 4. Config verification (shares architecture with Qwen2-0.5B)
    let config = Qwen2Config::qwen25_coder_0_5b_instruct();
    assert_eq!(config.hidden_size, 896, "Same hidden_size as Qwen2-0.5B");
    assert_eq!(config.num_layers, 24, "Same num_layers as Qwen2-0.5B");
}

/// Verify all Qwen2 tensor name mappings
#[test]
fn z_import_qwen2_tensor_mappings() {
    use aprender::format::Architecture;

    let arch = Architecture::Qwen2;

    // Test all expected tensor patterns
    // PMAT-099: Preserve model. prefix for AprTransformer compatibility
    // The model. prefix is now preserved during import for architecture compatibility
    let patterns = [
        ("model.embed_tokens.weight", "model.embed_tokens.weight"),
        (
            "model.layers.0.self_attn.q_proj.weight",
            "model.layers.0.self_attn.q_proj.weight",
        ),
        (
            "model.layers.0.self_attn.k_proj.weight",
            "model.layers.0.self_attn.k_proj.weight",
        ),
        (
            "model.layers.0.self_attn.v_proj.weight",
            "model.layers.0.self_attn.v_proj.weight",
        ),
        (
            "model.layers.0.self_attn.o_proj.weight",
            "model.layers.0.self_attn.o_proj.weight",
        ),
        (
            "model.layers.0.mlp.gate_proj.weight",
            "model.layers.0.mlp.gate_proj.weight",
        ),
        (
            "model.layers.0.mlp.up_proj.weight",
            "model.layers.0.mlp.up_proj.weight",
        ),
        (
            "model.layers.0.mlp.down_proj.weight",
            "model.layers.0.mlp.down_proj.weight",
        ),
        (
            "model.layers.0.input_layernorm.weight",
            "model.layers.0.input_layernorm.weight",
        ),
        (
            "model.layers.0.post_attention_layernorm.weight",
            "model.layers.0.post_attention_layernorm.weight",
        ),
        ("model.norm.weight", "model.norm.weight"),
        ("lm_head.weight", "lm_head.weight"),
    ];

    for (input, expected) in patterns {
        let mapped = arch.map_name(input);
        assert_eq!(mapped, expected, "Mapping for {input} should match");
    }
}

// ============================================================================
// Section 19 Integration Tests: End-to-End Validation
// These tests require model files and are marked #[ignore] for CI
// Run with: cargo test --test spec_checklist_tests z_ -- --ignored
// ============================================================================

/// Z1-E2E: End-to-end TinyLlama import test
/// Requires: TinyLlama model file
#[test]
#[ignore = "Requires TinyLlama model file - run manually with model"]
fn z1_e2e_tinyllama_import() {
    // This would test actual import:
    // apr import path/to/tinyllama.safetensors -o tinyllama.apr
    // Then verify the APR file is valid
    println!("Z1-E2E: Would test TinyLlama import with real model file");
}

/// Z4-E2E: End-to-end QwenCoder serving test
/// Requires: QwenCoder model file and running server
#[test]
#[ignore = "Requires QwenCoder model and server - run manually"]
fn z4_e2e_qwencoder_serving() {
    // This would test:
    // 1. apr serve qwencoder.apr &
    // 2. curl POST with code completion request
    // 3. Verify response has valid code
    println!("Z4-E2E: Would test QwenCoder serving with real model");
}

/// Z7-E2E: End-to-end TTFT latency test
/// Requires: Running server with model
#[test]
#[ignore = "Requires running server - run manually"]
fn z7_e2e_ttft_latency() {
    // This would test:
    // 1. Start server
    // 2. Measure time from request to first token
    // 3. Assert < 50ms
    println!("Z7-E2E: Would measure TTFT latency");
}

/// Z9-E2E: End-to-end high-load stability test
/// Requires: Running server with model
#[test]
#[ignore = "Requires running server - stress test manually"]
fn z9_e2e_high_load_stability() {
    // This would test:
    // 1. Start server
    // 2. Launch 50 concurrent connections
    // 3. Verify no crashes
    println!("Z9-E2E: Would test 50 concurrent connections");
}

/// Z10-E2E: End-to-end overhead comparison
/// Requires: Model file for both bench and serve
#[test]
#[ignore = "Requires model file - run manually"]
fn z10_e2e_overhead_comparison() {
    // This would test:
    // 1. Run apr bench model.apr -> get tok/s
    // 2. Run apr serve, measure serving tok/s
    // 3. Assert serving >= 95% of bench
    println!("Z10-E2E: Would compare bench vs serve performance");
}