apr-cli 0.4.16

CLI tool for APR model inspection, debugging, and operations
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
//! Import command implementation
//!
//! Implements APR-SPEC §13: Import/Convert Pipeline
//!
//! Downloads models from HuggingFace, converts to APR format with inline validation.

use crate::error::{CliError, Result};
use crate::output;
use aprender::format::{apr_import, Architecture, ImportOptions, Source, ValidationConfig};
use colored::Colorize;
use std::path::{Path, PathBuf};

/// Run the import command
#[provable_contracts_macros::contract("apr-cli-safety-v1", equation = "offline_guard")]
pub(crate) fn run(
    source: &str,
    output: Option<&Path>,
    arch: Option<&str>,
    quantize: Option<&str>,
    strict: bool,
    preserve_q4k: bool,
    tokenizer: Option<&PathBuf>,
    enforce_provenance: bool,
    allow_no_config: bool,
) -> Result<()> {
    contract_pre_format_conversion_roundtrip!();
    // GH-267: Detect PyTorch model.bin format and give helpful error
    reject_pytorch_format(source)?;

    check_provenance(source, enforce_provenance)?;

    // GH-169: Derive output path from source if not provided
    let output_path = match output {
        Some(p) => p.to_path_buf(),
        None => derive_output_path(source)?,
    };
    let output = output_path.as_path();

    // GH-582: --preserve-q4k is now the default for GGUF imports
    if preserve_q4k {
        eprintln!(
            "  {} --preserve-q4k is now the default for GGUF imports (PMAT-103). Flag has no additional effect.",
            output::badge_warn("NOTE")
        );
    }

    // PMAT-103: If preserve_q4k is set and source is a local GGUF file,
    // use realizar's Q4K converter to preserve quantization
    #[cfg(feature = "inference")]
    if preserve_q4k {
        let source_path = std::path::Path::new(source);
        if source_path.exists()
            && source_path
                .extension()
                .is_some_and(|ext| ext.eq_ignore_ascii_case("gguf"))
        {
            return run_q4k_import(source_path, output);
        }
    }

    // BUG-IMPORT-001 FIX: Warn if preserve_q4k is used but feature not enabled
    #[cfg(not(feature = "inference"))]
    if preserve_q4k {
        eprintln!(
            "  {} --preserve-q4k requires the 'inference' feature. \
             Falling back to standard import (Q4K will be dequantized to F32).",
            output::badge_warn("WARN")
        );
    }

    // Parse and display source info
    let parsed_source = Source::parse(source)
        .map_err(|e| CliError::ValidationFailed(format!("Invalid source: {e}")))?;

    output::header("APR Import Pipeline");

    let source_desc = describe_source(&parsed_source);

    println!(
        "{}",
        output::kv_table(&[
            ("Source", source_desc),
            ("Output", output.display().to_string()),
        ])
    );
    println!();

    // Build import options
    let architecture = parse_architecture(arch)?;
    let options = ImportOptions {
        architecture,
        validation: if strict {
            ValidationConfig::Strict
        } else {
            ValidationConfig::Basic
        },
        quantize: parse_quantize(quantize)?,
        compress: None,
        strict,
        cache: true,
        tokenizer_path: tokenizer.cloned(),
        allow_no_config,
    };

    print_import_config(&options);

    // Run import pipeline
    output::pipeline_stage("Importing", output::StageStatus::Running);
    print_import_result(apr_import(source, output, options))
}

/// F-GT-001: Enforce provenance chain — reject pre-baked GGUF imports.
fn check_provenance(source: &str, enforce: bool) -> Result<()> {
    if !enforce {
        return Ok(());
    }
    let is_gguf = source.to_ascii_lowercase().ends_with(".gguf")
        || source.contains("-GGUF")
        || source.contains("-gguf");
    if is_gguf {
        return Err(CliError::ValidationFailed(
            "F-GT-001: --enforce-provenance rejects GGUF imports. \
             Use SafeTensors as the canonical source format for single-provenance testing. \
             See Section 0 of qwen2.5-coder-showcase-demo.md for rationale."
                .to_string(),
        ));
    }
    Ok(())
}

/// Describe a parsed source for display.
fn describe_source(source: &Source) -> String {
    match source {
        Source::HuggingFace { org, repo, file } => {
            let base = format!("hf://{org}/{repo}");
            file.as_ref()
                .map_or(base.clone(), |f| format!("{base}/{f}"))
        }
        Source::Local(path) => path.display().to_string(),
        Source::Url(url) => url.clone(),
    }
}

/// Parse architecture string into Architecture enum.
fn parse_architecture(arch: Option<&str>) -> Result<Architecture> {
    match arch {
        Some("whisper") => Ok(Architecture::Whisper),
        Some("llama") => Ok(Architecture::Llama),
        Some("bert") => Ok(Architecture::Bert),
        Some("qwen2") => Ok(Architecture::Qwen2),
        Some("qwen3") => Ok(Architecture::Qwen3),
        Some("qwen3_5" | "qwen3.5") => Ok(Architecture::Qwen3_5),
        Some("gpt2" | "starcoder" | "bigcode") => Ok(Architecture::Gpt2),
        Some("gpt-neox" | "gpt_neox" | "pythia") => Ok(Architecture::GptNeoX),
        Some("opt" | "galactica") => Ok(Architecture::Opt),
        Some("phi" | "phi3" | "phi4") => Ok(Architecture::Phi),
        Some("gemma" | "gemma2" | "gemma3") => Ok(Architecture::Llama),
        Some(unsupported @ ("falcon" | "mamba" | "t5")) => Err(CliError::ValidationFailed(format!(
            "Architecture '{unsupported}' is not yet supported. Tracking: https://github.com/anthropics/aprender/issues"
        ))),
        Some("auto") | None => Ok(Architecture::Auto),
        Some(other) => Err(CliError::ValidationFailed(format!(
            "Unknown architecture: {other}. Supported: whisper, llama, bert, qwen2, qwen3, qwen3_5, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto"
        ))),
    }
}

/// Print import configuration.
fn print_import_config(options: &ImportOptions) {
    let mut config_pairs: Vec<(&str, String)> = vec![
        ("Architecture", format!("{:?}", options.architecture)),
        ("Validation", format!("{:?}", options.validation)),
    ];
    if let Some(q) = &options.quantize {
        config_pairs.push(("Quantization", format!("{q:?}")));
    }
    println!("{}", output::kv_table(&config_pairs));
    println!();
}

/// Print import result with validation report.
fn print_import_result(
    result: std::result::Result<aprender::format::ValidationReport, aprender::error::AprenderError>,
) -> Result<()> {
    match result {
        Ok(report) => {
            println!();
            output::subheader("Validation Report");
            let grade = report.grade();
            println!(
                "{}",
                output::kv_table(&[
                    ("Score", format!("{}/100", report.total_score)),
                    ("Grade", output::grade_color(grade).to_string()),
                ])
            );
            println!();

            if report.passed(95) {
                println!("  {}", output::badge_pass("Import successful"));
            } else {
                println!("  {}", output::badge_warn("Import completed with warnings"));
            }

            Ok(())
        }
        Err(e) => {
            println!();
            println!("  {}", output::badge_fail("Import failed"));
            Err(CliError::ValidationFailed(e.to_string()))
        }
    }
}

fn parse_quantize(
    quantize: Option<&str>,
) -> Result<Option<aprender::format::converter::QuantizationType>> {
    use aprender::format::converter::QuantizationType;

    match quantize {
        None => Ok(None),
        Some("int8") => Ok(Some(QuantizationType::Int8)),
        Some("int4") => Ok(Some(QuantizationType::Int4)),
        Some("fp16") => Ok(Some(QuantizationType::Fp16)),
        Some("q4k" | "q4_k") => Ok(Some(QuantizationType::Q4K)),
        Some(other) => Err(CliError::ValidationFailed(format!(
            "Unknown quantization: {other}. Supported: int8, int4, fp16, q4k"
        ))),
    }
}

/// PMAT-103: Import GGUF file to APR with Q4K quantization preserved
///
/// This uses realizar's `GgufToAprQ4KConverter` to create an APR file
/// that preserves raw Q4K bytes for fused kernel inference.
#[cfg(feature = "inference")]
fn run_q4k_import(source: &Path, output: &Path) -> Result<()> {
    use humansize::{format_size, BINARY};
    use realizar::convert::GgufToAprQ4KConverter;

    output::header("APR Q4K Import (Fused Kernel)");
    println!(
        "{}",
        output::kv_table(&[
            ("Source", format!("{} (GGUF)", source.display())),
            ("Output", format!("{} (APR with Q4K)", output.display())),
        ])
    );
    println!();
    output::pipeline_stage("Preserving Q4K quantization", output::StageStatus::Running);

    // Use realizar's Q4K converter
    match GgufToAprQ4KConverter::convert(source, output) {
        Ok(stats) => {
            println!();
            output::subheader("Q4K Import Report");
            println!(
                "{}",
                output::kv_table(&[
                    ("Total tensors", stats.tensor_count.to_string()),
                    ("Q4K tensors", stats.q4k_tensor_count.to_string()),
                    ("Total bytes", format_size(stats.total_bytes as u64, BINARY)),
                    ("Architecture", stats.architecture.clone()),
                    ("Layers", stats.num_layers.to_string()),
                    ("Hidden size", stats.hidden_size.to_string()),
                ])
            );
            println!();
            println!("  {}", output::badge_pass("Q4K import successful"));
            println!(
                "{}",
                "  Model ready for fused kernel inference (30+ tok/s CPU target)".dimmed()
            );
            Ok(())
        }
        Err(e) => {
            println!();
            println!("  {}", output::badge_fail("Q4K import failed"));
            Err(CliError::ValidationFailed(e.to_string()))
        }
    }
}

/// Derive output .apr filename from source (GH-169)
///
/// Examples:
/// - hf://Qwen/Qwen2.5-Coder-1.5B-Instruct → Qwen2.5-Coder-1.5B-Instruct.apr
/// - hf://Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/model.gguf → model.apr
/// - /path/to/model.gguf → model.apr
/// - /path/to/model.safetensors → model.apr
fn derive_output_path(source: &str) -> Result<PathBuf> {
    // Parse the source to extract a reasonable filename
    if let Ok(parsed) = Source::parse(source) {
        match parsed {
            Source::HuggingFace { org: _, repo, file } => {
                // If file is specified, use its stem; otherwise use repo name
                let base_name = if let Some(f) = file {
                    Path::new(&f)
                        .file_stem()
                        .and_then(|s| s.to_str())
                        .unwrap_or(&repo)
                        .to_string()
                } else {
                    repo
                };
                Ok(PathBuf::from(format!("{base_name}.apr")))
            }
            Source::Local(path) => {
                let stem = path.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
                    CliError::ValidationFailed("Cannot derive output name from source".into())
                })?;
                Ok(PathBuf::from(format!("{stem}.apr")))
            }
            Source::Url(url) => {
                // Extract filename from URL string
                let filename = url.rsplit('/').next().unwrap_or("model");
                let stem = Path::new(filename)
                    .file_stem()
                    .and_then(|s| s.to_str())
                    .unwrap_or("model");
                Ok(PathBuf::from(format!("{stem}.apr")))
            }
        }
    } else {
        // Fallback: try to extract filename from source string
        let path = Path::new(source);
        let stem = path.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
            CliError::ValidationFailed(
                "Cannot derive output name from source. Please specify --output.".into(),
            )
        })?;
        Ok(PathBuf::from(format!("{stem}.apr")))
    }
}

/// GH-267: Detect PyTorch model.bin files and give actionable conversion advice.
///
/// PyTorch checkpoints use Python pickle (magic: 0x80 0x02..0x05) or ZIP
/// (magic: PK\x03\x04). Neither can be parsed safely in pure Rust.
fn reject_pytorch_format(source: &str) -> Result<()> {
    let path = Path::new(source);

    // Check extension first (.bin is the standard PyTorch extension)
    let is_bin_extension = path
        .extension()
        .is_some_and(|ext| ext.eq_ignore_ascii_case("bin") || ext.eq_ignore_ascii_case("pt"));
    if !is_bin_extension {
        return Ok(());
    }

    // Verify by reading magic bytes if the file exists locally
    if path.exists() {
        if let Ok(magic) = read_magic_bytes(path) {
            if is_pytorch_magic(&magic) {
                return Err(pytorch_conversion_error(source));
            }
        }
        // .bin file exists but isn't PyTorch — could be other binary, let pipeline handle it
        return Ok(());
    }

    // Remote .bin file — reject based on extension alone
    Err(pytorch_conversion_error(source))
}

fn read_magic_bytes(path: &Path) -> std::io::Result<[u8; 4]> {
    use std::io::Read;
    let mut f = std::fs::File::open(path)?;
    let mut buf = [0u8; 4];
    f.read_exact(&mut buf)?;
    Ok(buf)
}

/// Check if magic bytes indicate PyTorch format (pickle or ZIP).
#[allow(clippy::trivially_copy_pass_by_ref)]
fn is_pytorch_magic(magic: &[u8; 4]) -> bool {
    // ZIP archive (torch.save with _use_new_zipfile_serialization=True, default since PyTorch 1.6)
    if magic[0..4] == *b"PK\x03\x04" {
        return true;
    }
    // Python pickle protocol (older torch.save)
    if magic[0] == 0x80 && (2..=5).contains(&magic[1]) {
        return true;
    }
    false
}

fn pytorch_conversion_error(source: &str) -> CliError {
    CliError::ValidationFailed(format!(
        "GH-267: '{source}' appears to be a PyTorch checkpoint (model.bin / .pt).\n\
         \n\
         PyTorch checkpoints use Python pickle format which cannot be parsed in pure Rust.\n\
         Convert to SafeTensors first using one of these methods:\n\
         \n\
         Method 1 (recommended): HuggingFace CLI\n\
           pip install huggingface-hub\n\
           huggingface-cli convert {source} --to safetensors\n\
         \n\
         Method 2: Python one-liner\n\
           pip install torch safetensors\n\
           python -c \"import torch; from safetensors.torch import save_file; \\\n\
             sd = torch.load('{source}', weights_only=True); \\\n\
             save_file(sd, '{source}'.replace('.bin', '.safetensors'))\"\n\
         \n\
         Then import the resulting .safetensors file:\n\
           apr import model.safetensors -o model.apr"
    ))
}

#[cfg(test)]
#[path = "import_tests.rs"]
mod tests;