aprender-core 0.29.2

Next-generation machine learning library in pure Rust
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481

/// Build GGUF architecture metadata from APR model metadata
fn build_gguf_arch_metadata(
    apr_metadata: &crate::format::v2::AprV2Metadata,
) -> Vec<(String, crate::format::gguf::GgufValue)> {
    use crate::format::gguf::GgufValue;

    let arch = resolve_architecture(apr_metadata);
    // C-07 (Meyer DbC): Require dimensions from model metadata — no silent LLaMA-7B defaults.
    // These fields are always populated during import/conversion. If missing, the APR file
    // is malformed and exporting with wrong dimensions would produce a corrupt GGUF.
    let hidden_size = apr_metadata
        .hidden_size
        .expect("C-07: hidden_size required for GGUF export (missing in APR metadata)");
    let num_layers = apr_metadata
        .num_layers
        .expect("C-07: num_layers required for GGUF export (missing in APR metadata)");
    let num_heads = apr_metadata
        .num_heads
        .expect("C-07: num_heads required for GGUF export (missing in APR metadata)");
    let num_kv_heads = apr_metadata.num_kv_heads.unwrap_or(num_heads);
    let vocab_size = apr_metadata
        .vocab_size
        .expect("C-07: vocab_size required for GGUF export (missing in APR metadata)");
    let intermediate_size = apr_metadata
        .intermediate_size
        .expect("C-07: intermediate_size required for GGUF export (missing in APR metadata)");
    let max_pos = apr_metadata.max_position_embeddings.unwrap_or(0);
    // N-01 (Meyer DbC): rope_theta from metadata, or architecture-specific default.
    let rope_theta = apr_metadata.rope_theta.unwrap_or_else(||
        super::export::default_rope_theta_for_architecture(arch));
    let rms_norm_eps = apr_metadata.rms_norm_eps.unwrap_or(1e-6);
    let head_dim = if num_heads > 0 {
        hidden_size / num_heads
    } else {
        0
    };
    let model_name = apr_metadata
        .name
        .clone()
        .unwrap_or_else(|| "model".to_string());

    let mut metadata = vec![
        (
            "general.architecture".to_string(),
            GgufValue::String(arch.to_string()),
        ),
        ("general.name".to_string(), GgufValue::String(model_name)),
        (
            "general.quantization_version".to_string(),
            GgufValue::Uint32(2),
        ),
        ("general.file_type".to_string(), GgufValue::Uint32(0)),
        (
            format!("{arch}.context_length"),
            GgufValue::Uint32(max_pos as u32),
        ),
        (
            format!("{arch}.embedding_length"),
            GgufValue::Uint32(hidden_size as u32),
        ),
        (
            format!("{arch}.block_count"),
            GgufValue::Uint32(num_layers as u32),
        ),
        (
            format!("{arch}.feed_forward_length"),
            GgufValue::Uint32(intermediate_size as u32),
        ),
        (
            format!("{arch}.attention.head_count"),
            GgufValue::Uint32(num_heads as u32),
        ),
        (
            format!("{arch}.attention.head_count_kv"),
            GgufValue::Uint32(num_kv_heads as u32),
        ),
    ];

    // GH-277: GPT-2 uses standard LayerNorm, not RMSNorm
    if arch == "gpt2" {
        metadata.push((
            format!("{arch}.attention.layer_norm_epsilon"),
            GgufValue::Float32(rms_norm_eps),
        ));
    } else {
        metadata.push((
            format!("{arch}.attention.layer_norm_rms_epsilon"),
            GgufValue::Float32(rms_norm_eps),
        ));
    }

    // GH-277: Only emit RoPE keys for architectures that use RoPE
    if uses_rope(arch) {
        metadata.push((
            format!("{arch}.rope.dimension_count"),
            GgufValue::Uint32(head_dim as u32),
        ));
        metadata.push((
            format!("{arch}.rope.freq_base"),
            GgufValue::Float32(rope_theta),
        ));
    }

    metadata.push((
        format!("{arch}.vocab_size"),
        GgufValue::Uint32(vocab_size as u32),
    ));

    metadata
}

/// Push a string array from APR custom fields to GGUF entries.
fn push_string_array(
    entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
    custom: &std::collections::HashMap<String, serde_json::Value>,
    src_key: &str,
    gguf_key: &str,
) {
    let arr = custom.get(src_key).and_then(|v| v.as_array());
    let Some(arr) = arr else { return };
    let strings: Vec<String> = arr
        .iter()
        .filter_map(|v| v.as_str().map(String::from))
        .collect();
    if !strings.is_empty() {
        entries.push((
            gguf_key.to_string(),
            crate::format::gguf::GgufValue::ArrayString(strings),
        ));
    }
}

/// Push a u32 value from APR custom fields to GGUF entries.
fn push_u32_field(
    entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
    custom: &std::collections::HashMap<String, serde_json::Value>,
    src_key: &str,
    gguf_key: &str,
) {
    if let Some(val) = custom.get(src_key).and_then(|v| v.as_u64()) {
        entries.push((
            gguf_key.to_string(),
            crate::format::gguf::GgufValue::Uint32(val as u32),
        ));
    }
}

/// Push an i32 array from APR custom fields to GGUF entries.
fn push_i32_array(
    entries: &mut Vec<(String, crate::format::gguf::GgufValue)>,
    custom: &std::collections::HashMap<String, serde_json::Value>,
    src_key: &str,
    gguf_key: &str,
) {
    let arr = custom.get(src_key).and_then(|v| v.as_array());
    let Some(arr) = arr else { return };
    let types: Vec<i32> = arr
        .iter()
        .filter_map(|v| v.as_i64().map(|n| n as i32))
        .collect();
    if !types.is_empty() {
        entries.push((
            gguf_key.to_string(),
            crate::format::gguf::GgufValue::ArrayInt32(types),
        ));
    }
}

/// Extract tokenizer metadata from APR custom fields for GGUF export (GH-253)
fn extract_apr_tokenizer_for_gguf(
    apr_metadata: &crate::format::v2::AprV2Metadata,
) -> Vec<(String, crate::format::gguf::GgufValue)> {
    use crate::format::gguf::GgufValue;

    let mut entries = Vec::new();
    let custom = &apr_metadata.custom;
    let arch = resolve_architecture(apr_metadata);

    // Tokenizer model type: "gpt2" for byte-level BPE (Qwen, GPT-2), "llama" for SentencePiece
    // GH-253-3: APR stores raw model_type from GGUF which may be "bpe" — map to "gpt2"
    let raw_model_type = custom
        .get("tokenizer.model")
        .and_then(|v| v.as_str())
        .unwrap_or("gpt2");
    let model_type = match raw_model_type {
        "bpe" => "gpt2",
        other => other,
    };
    entries.push((
        "tokenizer.ggml.model".to_string(),
        GgufValue::String(model_type.to_string()),
    ));
    // GH-277: Use pre-tokenizer type mapping, preferring round-trip preserved value
    let model_name = apr_metadata.name.as_deref().unwrap_or("");
    let pre_type = custom
        .get("tokenizer.pre_type")
        .and_then(|v| v.as_str())
        .unwrap_or_else(|| resolve_pre_tokenizer_type(arch, model_name));
    entries.push((
        "tokenizer.ggml.pre".to_string(),
        GgufValue::String(pre_type.to_string()),
    ));

    push_string_array(
        &mut entries,
        custom,
        "tokenizer.vocabulary",
        "tokenizer.ggml.tokens",
    );
    push_string_array(
        &mut entries,
        custom,
        "tokenizer.merges",
        "tokenizer.ggml.merges",
    );
    push_u32_field(
        &mut entries,
        custom,
        "tokenizer.bos_token_id",
        "tokenizer.ggml.bos_token_id",
    );
    push_u32_field(
        &mut entries,
        custom,
        "tokenizer.eos_token_id",
        "tokenizer.ggml.eos_token_id",
    );
    push_i32_array(
        &mut entries,
        custom,
        "tokenizer.token_type",
        "tokenizer.ggml.token_type",
    );
    push_u32_field(
        &mut entries,
        custom,
        "tokenizer.padding_token_id",
        "tokenizer.ggml.padding_token_id",
    );

    // GH-253-1: add_bos_token flag
    if let Some(add_bos) = custom
        .get("tokenizer.add_bos_token")
        .and_then(|v| v.as_bool())
    {
        entries.push((
            "tokenizer.ggml.add_bos_token".to_string(),
            GgufValue::Bool(add_bos),
        ));
    }

    // GH-253-1: Chat template (Jinja2)
    let chat_tmpl = apr_metadata.chat_template.as_deref().or_else(|| {
        custom
            .get("tokenizer.chat_template")
            .and_then(|v| v.as_str())
    });
    if let Some(tmpl) = chat_tmpl {
        entries.push((
            "tokenizer.chat_template".to_string(),
            GgufValue::String(tmpl.to_string()),
        ));
    }

    entries
}

/// GH-246: Export to MLX format (Apple Silicon).
///
/// MLX models are stored as a directory containing:
/// - `model.safetensors` — weights in SafeTensors format
/// - `config.json` — model configuration (HuggingFace-compatible)
/// - `tokenizer.json` — tokenizer (optional, from APR metadata)
///
/// This reuses the SafeTensors export path since MLX uses SafeTensors as its
/// underlying weight format. The key difference is the directory structure.
fn export_mlx(
    tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
    input_path: &Path,
    output_path: &Path,
    options: &ExportOptions,
) -> Result<()> {
    // Output path is the directory
    fs::create_dir_all(output_path).map_err(|e| AprenderError::FormatError {
        message: format!("Failed to create MLX output directory: {e}"),
    })?;

    // Write model.safetensors
    let weights_path = output_path.join("model.safetensors");
    let user_metadata = extract_user_metadata(input_path);
    if user_metadata.is_empty() {
        save_safetensors(&weights_path, tensors).map_err(|e| AprenderError::FormatError {
            message: format!("Failed to write MLX weights: {e}"),
        })?;
    } else {
        save_safetensors_with_metadata(&weights_path, tensors, &user_metadata).map_err(|e| {
            AprenderError::FormatError {
                message: format!("Failed to write MLX weights: {e}"),
            }
        })?;
    }

    // Write config.json
    let config = infer_model_config(tensors);
    let config_path = output_path.join("config.json");
    fs::write(&config_path, config).map_err(|e| AprenderError::FormatError {
        message: format!("Failed to write MLX config.json: {e}"),
    })?;

    // Write tokenizer.json if available
    if options.include_tokenizer {
        let tokenizer_json = infer_tokenizer_json(input_path);
        if !tokenizer_json.is_empty() {
            let tokenizer_path = output_path.join("tokenizer.json");
            if let Err(e) = fs::write(&tokenizer_path, &tokenizer_json) {
                eprintln!("[GH-246] Warning: Failed to write tokenizer.json: {e}");
            }
        }
    }

    Ok(())
}

/// PMAT-252: Raw block passthrough for APR→GGUF export.
///
/// Reads raw tensor bytes directly from APR file (Q4K super-blocks, F32 vectors,
/// etc.) and writes them to GGUF without any dequantization/requantization.
/// This is LOSSLESS for quantized data — zero quality degradation.
///
/// The key insight: APR and GGUF both store Q4K blocks in the same binary format
/// (256-element super-blocks, 144 bytes each). The only differences are:
/// 1. Tensor names (HF convention in APR → GGML convention in GGUF)
/// 2. Shape representation (APR [rows, cols] → GGUF [ne0=cols, ne1=rows])
/// 3. File-level metadata (APR header → GGUF KV pairs)
fn export_apr_to_gguf_raw(input: &Path, output: &Path) -> Result<ExportReport> {
    use crate::format::gguf::{export_tensors_to_gguf, GgmlType, GgufTensor};
    use crate::format::v2::{AprV2Reader, TensorDType};
    use std::fs::File;
    use std::io::BufWriter;

    let data = fs::read(input).map_err(|e| AprenderError::FormatError {
        message: format!("Failed to read APR file: {e}"),
    })?;
    let original_size = data.len();

    let reader = AprV2Reader::from_bytes(&data).map_err(|e| AprenderError::FormatError {
        message: format!("Failed to parse APR file: {e:?}"),
    })?;

    let apr_metadata = reader.metadata().clone();

    let arch = resolve_architecture(&apr_metadata);
    // C-07 (Meyer DbC): Required dimensions — no silent LLaMA-7B defaults.
    let num_layers = apr_metadata
        .num_layers
        .expect("C-07: num_layers required for GGUF export");
    let num_heads = apr_metadata
        .num_heads
        .expect("C-07: num_heads required for GGUF export");
    let num_kv_heads = apr_metadata.num_kv_heads.unwrap_or(num_heads);
    let hidden_size = apr_metadata
        .hidden_size
        .expect("C-07: hidden_size required for GGUF export");

    // Build metadata from architecture config + tokenizer custom fields
    let mut metadata = build_gguf_arch_metadata(&apr_metadata);
    metadata.extend(extract_apr_tokenizer_for_gguf(&apr_metadata));

    // GH-253-4: Validate metadata completeness before writing
    let validated = ValidatedGgufMetadata::validate(metadata)?;

    eprintln!(
        "[PMAT-252] Writing {} metadata keys (arch={}, layers={}, heads={}/{}kv, hidden={})",
        validated.as_slice().len(),
        arch,
        num_layers,
        num_heads,
        num_kv_heads,
        hidden_size
    );

    // GH-277: Build contract-driven tensor name mapper
    let mapper = build_gguf_mapper(arch);

    // Build GGUF tensors with raw byte passthrough
    let tensor_names = reader.tensor_names();
    let mut gguf_tensors = Vec::with_capacity(tensor_names.len());

    for name in &tensor_names {
        // GH-277: Use contract-driven mapping; skip tensors that return None
        let Some(gguf_name) = mapper.map_name(name) else {
            eprintln!("[GH-277] Skipping tensor '{}' (not in GGUF contract)", name);
            continue;
        };

        let entry = reader
            .get_tensor(name)
            .ok_or_else(|| AprenderError::FormatError {
                message: format!("Tensor '{}' missing from index", name),
            })?;
        let raw_bytes = reader
            .get_tensor_data(name)
            .ok_or_else(|| AprenderError::FormatError {
                message: format!("Tensor '{}' data not found", name),
            })?;

        // Map APR dtype → GGUF dtype (same discriminant values)
        // GH-439 (poka-yoke): Exhaustive match — no silent fallbacks.
        // Adding a new TensorDType variant forces a compile error here.
        let gguf_dtype = match entry.dtype {
            TensorDType::F32 => GgmlType::F32,
            TensorDType::F16 => GgmlType::F16,
            TensorDType::Q4K => GgmlType::Q4K,
            TensorDType::Q6K => GgmlType::Q6K,
            TensorDType::AprQ8 => GgmlType::Q8_0,
            TensorDType::BF16 | TensorDType::F64 | TensorDType::I32
            | TensorDType::I64 | TensorDType::I8 | TensorDType::U8
            | TensorDType::AprQ4 => {
                return Err(AprenderError::FormatError {
                    message: format!(
                        "Tensor '{}' has dtype {:?} which has no GGUF equivalent. \
                         Convert to F32/F16 first with `apr convert`.",
                        name, entry.dtype
                    ),
                });
            }
        };

        // Reverse shape for GGUF: [rows, cols] → [ne0=cols, ne1=rows]
        let gguf_shape = if entry.shape.len() == 2 {
            vec![entry.shape[1] as u64, entry.shape[0] as u64]
        } else {
            entry.shape.iter().map(|&d| d as u64).collect()
        };

        eprintln!(
            "[PMAT-252] '{}': {} bytes (dtype={:?})",
            gguf_name,
            raw_bytes.len(),
            entry.dtype
        );

        gguf_tensors.push(GgufTensor {
            name: gguf_name,
            shape: gguf_shape,
            dtype: gguf_dtype,
            data: raw_bytes.to_vec(),
        });
    }

    // GH-277: Add fused tensors (e.g., QKV fusion for GPT-2)
    let fused = build_fused_tensors_raw(&mapper, &reader);
    gguf_tensors.extend(fused);

    // Write to file
    let file = File::create(output).map_err(|e| AprenderError::FormatError {
        message: format!("Failed to create output file: {e}"),
    })?;
    let mut writer = BufWriter::new(file);

    export_tensors_to_gguf(&mut writer, &gguf_tensors, validated.as_slice())?;

    let exported_size = fs::metadata(output).map(|m| m.len() as usize).unwrap_or(0);

    Ok(ExportReport {
        original_size,
        exported_size,
        tensor_count: gguf_tensors.len(),
        format: ExportFormat::Gguf,
        quantization: Some(QuantizationType::Q4K),
    })
}

/// Legacy mapper for test compatibility.
/// Uses the fallback legacy mapper (same behavior as old hardcoded function).
#[cfg(test)]
fn hf_to_gguf_name(name: &str) -> String {
    let mapper = build_legacy_mapper();
    mapper.map_name(name).unwrap_or_else(|| name.to_string())
}