apr_cli/commands_enum.rs
1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4 /// Run model directly (auto-download, cache, execute)
5 Run {
6 /// Model source: local path, hf://org/repo, or URL
7 #[arg(value_name = "SOURCE")]
8 source: String,
9 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10 #[arg(value_name = "PROMPT")]
11 positional_prompt: Option<String>,
12 /// Input file (audio, text, etc.)
13 #[arg(short, long)]
14 input: Option<PathBuf>,
15 /// Text prompt for generation (for LLM models)
16 #[arg(short, long)]
17 prompt: Option<String>,
18 /// Maximum tokens to generate (default: 32)
19 #[arg(short = 'n', long, default_value = "32")]
20 max_tokens: usize,
21 /// Enable streaming output
22 #[arg(long)]
23 stream: bool,
24 /// Language code (for ASR models)
25 #[arg(short, long)]
26 language: Option<String>,
27 /// Task (transcribe, translate)
28 #[arg(short, long)]
29 task: Option<String>,
30 /// Output format (text, json, srt, vtt)
31 #[arg(short = 'f', long, default_value = "text")]
32 format: String,
33 /// Disable GPU acceleration (force CPU-only inference)
34 #[arg(long, alias = "cpu", conflicts_with = "gpu")]
35 no_gpu: bool,
36 /// Force GPU acceleration
37 #[arg(long, conflicts_with = "no_gpu")]
38 gpu: bool,
39 /// Offline mode: block all network access (Sovereign AI compliance)
40 #[arg(long)]
41 offline: bool,
42 /// Benchmark mode: output performance metrics (tok/s, latency)
43 #[arg(long)]
44 benchmark: bool,
45 /// Enable inference tracing (APR-TRACE-001)
46 #[arg(long)]
47 trace: bool,
48 /// Trace specific steps only (comma-separated)
49 #[arg(long, value_delimiter = ',')]
50 trace_steps: Option<Vec<String>>,
51 /// Verbose tracing (show tensor values)
52 #[arg(long)]
53 trace_verbose: bool,
54 /// Save trace output to JSON file
55 #[arg(long, value_name = "FILE")]
56 trace_output: Option<PathBuf>,
57 /// Trace detail level (none, basic, layer, payload, chrome)
58 /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59 /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60 #[arg(long, value_name = "LEVEL", default_value = "basic")]
61 trace_level: String,
62 /// Shorthand for --trace --trace-level payload (tensor value inspection)
63 #[arg(long)]
64 trace_payload: bool,
65 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66 #[arg(long)]
67 profile: bool,
68 /// Apply chat template for Instruct models (GAP-UX-001)
69 ///
70 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72 #[arg(long)]
73 chat: bool,
74 /// Sampling temperature (0.0 = greedy, default: 0.0)
75 #[arg(long, default_value = "0.0")]
76 temperature: f32,
77 /// Top-k sampling (default: 1 = greedy)
78 #[arg(long, default_value = "1")]
79 top_k: usize,
80 /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81 /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82 #[arg(long)]
83 top_p: Option<f32>,
84 /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85 /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86 #[arg(long, default_value = "299792458")]
87 seed: u64,
88 /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89 /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90 #[arg(long, default_value = "1.0")]
91 repeat_penalty: f32,
92 /// Context window for repetition penalty (number of recent tokens to check)
93 /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94 #[arg(long, default_value = "64")]
95 repeat_last_n: usize,
96 /// Process prompt tokens one-by-one instead of batched prefill.
97 /// Useful for debugging prefill correctness (comparing per-token attention).
98 /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99 #[arg(long)]
100 split_prompt: bool,
101 /// Batch mode: read prompts from JSONL, output results as JSONL.
102 /// Model loads once, processes all prompts sequentially.
103 /// Each input line: {"prompt": "...", "task_id": "..."}
104 /// Chat template is applied automatically.
105 #[arg(long, value_name = "FILE")]
106 batch_jsonl: Option<PathBuf>,
107 /// Show verbose output (model loading, backend info)
108 #[arg(short, long)]
109 verbose: bool,
110 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111 #[arg(long, value_name = "BACKEND")]
112 backend: Option<String>,
113 },
114 /// Inference server (plan/run)
115 Serve {
116 #[command(subcommand)]
117 command: ServeCommands,
118 },
119 /// Inspect model metadata, vocab, and structure
120 Inspect {
121 /// Path to .apr model file
122 #[arg(value_name = "FILE")]
123 file: PathBuf,
124 /// Show vocabulary details
125 #[arg(long)]
126 vocab: bool,
127 /// Show filter/security details
128 #[arg(long)]
129 filters: bool,
130 /// Show weight statistics
131 #[arg(long)]
132 weights: bool,
133 /// Output as JSON
134 #[arg(long)]
135 json: bool,
136 },
137 /// Simple debugging output ("drama" mode available)
138 Debug {
139 /// Path to .apr model file
140 #[arg(value_name = "FILE")]
141 file: PathBuf,
142 /// Theatrical "drama" mode output
143 #[arg(long)]
144 drama: bool,
145 /// Show hex dump
146 #[arg(long)]
147 hex: bool,
148 /// Extract ASCII strings
149 #[arg(long)]
150 strings: bool,
151 /// Limit output lines
152 #[arg(long, default_value = "256")]
153 limit: usize,
154 },
155 /// Validate model integrity and quality
156 Validate {
157 /// Path to .apr model file
158 #[arg(value_name = "FILE")]
159 file: PathBuf,
160 /// Show 100-point quality assessment
161 #[arg(long)]
162 quality: bool,
163 /// Strict validation (fail on warnings)
164 #[arg(long)]
165 strict: bool,
166 /// Minimum score to pass (0-100)
167 #[arg(long)]
168 min_score: Option<u8>,
169 },
170 /// Validate a publish manifest (FALSIFY-PM-001..006).
171 ///
172 /// Contract: `contracts/publish-manifest-v1.yaml`
173 /// Spec: SPEC-SHIP-TWO-001 §12.3 AC-EX-004
174 ValidateManifest {
175 /// Path to manifest YAML
176 #[arg(value_name = "MANIFEST")]
177 file: PathBuf,
178 /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
179 #[arg(long, value_name = "APR_FILE")]
180 artifact: Option<PathBuf>,
181 /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
182 /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
183 /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
184 #[arg(long)]
185 live: bool,
186 },
187 /// Compare two models
188 Diff {
189 /// First model file
190 #[arg(value_name = "FILE1")]
191 file1: PathBuf,
192 /// Second model file
193 #[arg(value_name = "FILE2")]
194 file2: PathBuf,
195 /// Show weight-level differences
196 #[arg(long)]
197 weights: bool,
198 /// Compare actual tensor values with statistical analysis
199 #[arg(long)]
200 values: bool,
201 /// Filter tensors by name pattern (for --values)
202 #[arg(long)]
203 filter: Option<String>,
204 /// Maximum number of tensors to compare (for --values)
205 #[arg(long, default_value = "10")]
206 limit: usize,
207 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
208 #[arg(long)]
209 transpose_aware: bool,
210 /// Output as JSON
211 #[arg(long)]
212 json: bool,
213 },
214 /// List tensor names and shapes
215 Tensors {
216 /// Path to .apr model file
217 #[arg(value_name = "FILE")]
218 file: PathBuf,
219 /// Show tensor statistics (mean, std, min, max)
220 #[arg(long)]
221 stats: bool,
222 /// Filter tensors by name pattern
223 #[arg(long)]
224 filter: Option<String>,
225 /// Limit number of tensors shown (0 = unlimited)
226 #[arg(long, default_value = "0")]
227 limit: usize,
228 /// Output as JSON
229 #[arg(long)]
230 json: bool,
231 },
232 /// Layer-by-layer trace analysis
233 Trace {
234 /// Path to .apr model file
235 #[arg(value_name = "FILE")]
236 file: PathBuf,
237 /// Filter layers by name pattern
238 #[arg(long)]
239 layer: Option<String>,
240 /// Compare with reference model
241 #[arg(long)]
242 reference: Option<PathBuf>,
243 /// Output as JSON
244 #[arg(long)]
245 json: bool,
246 /// Verbose output with per-layer stats
247 #[arg(short, long)]
248 verbose: bool,
249 /// Trace payload through model
250 #[arg(long)]
251 payload: bool,
252 /// Diff mode
253 #[arg(long)]
254 diff: bool,
255 /// Interactive mode
256 #[arg(long)]
257 interactive: bool,
258 /// Save per-stage F32 tensors during trace for SHIP-007 layer-0
259 /// element-wise diff. Comma-separated stage names from
260 /// `apr-cli-trace-save-tensor-v1.yaml` (e.g.
261 /// `embedding,qkv_matmul,attention`). Pass `all` to save every
262 /// stage. Output goes to `--save-tensor-dir` if provided,
263 /// else `<file_dir>/trace-tensors/<run_id>/`.
264 #[arg(long, value_name = "STAGES")]
265 save_tensor: Option<String>,
266 /// Output directory for `--save-tensor` (default: sibling
267 /// `trace-tensors/<run_id>/`).
268 #[arg(long, value_name = "DIR")]
269 save_tensor_dir: Option<PathBuf>,
270 /// Layer-id range for `--save-tensor` (default: 0..1, i.e.
271 /// layer 0 only). Format: `START..END` (Rust range syntax,
272 /// END exclusive).
273 #[arg(long, value_name = "RANGE", default_value = "0..1")]
274 save_tensor_layers: String,
275 },
276 /// Check for best practices and conventions
277 Lint {
278 /// Path to .apr model file
279 #[arg(value_name = "FILE")]
280 file: PathBuf,
281 },
282 /// Explain errors, architecture, tensors, and kernel dispatch
283 Explain {
284 /// Error code, model file path, or family name (auto-detected)
285 #[arg(value_name = "CODE_OR_FILE")]
286 code_or_file: Option<String>,
287 /// Path to .apr model file (optional context for --tensor)
288 #[arg(short, long)]
289 file: Option<PathBuf>,
290 /// Explain a specific tensor
291 #[arg(long)]
292 tensor: Option<String>,
293 /// Explain kernel dispatch pipeline for architecture
294 #[arg(long)]
295 kernel: bool,
296 /// Output as JSON
297 #[arg(long)]
298 json: bool,
299 /// Show kernel contract details and proof obligations
300 #[arg(short, long)]
301 verbose: bool,
302 /// Show per-kernel proof status from contract tests
303 #[arg(long)]
304 proof_status: bool,
305 },
306 /// Manage canary tests for regression
307 Canary {
308 #[command(subcommand)]
309 command: CanaryCommands,
310 },
311 /// Export model to other formats
312 Export {
313 /// Path to .apr model file
314 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
315 file: Option<PathBuf>,
316 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
317 #[arg(long, default_value = "safetensors")]
318 format: String,
319 /// Output file/directory path
320 #[arg(short, long)]
321 output: Option<PathBuf>,
322 /// Apply quantization during export (int8, int4, fp16)
323 #[arg(long)]
324 quantize: Option<String>,
325 /// List all supported export formats
326 #[arg(long)]
327 list_formats: bool,
328 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
329 #[arg(long)]
330 batch: Option<String>,
331 /// Output in JSON format
332 #[arg(long)]
333 json: bool,
334 /// Plan mode (validate inputs, show export plan, no execution)
335 #[arg(long)]
336 plan: bool,
337 },
338 /// Import from external formats (hf://org/repo, local files, URLs)
339 Import {
340 /// Source: hf://org/repo, local file, or URL
341 #[arg(value_name = "SOURCE")]
342 source: String,
343 /// Output .apr file path (default: derived from source name)
344 #[arg(short, long)]
345 output: Option<PathBuf>,
346 /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
347 #[arg(long, default_value = "auto")]
348 arch: String,
349 /// Quantization (int8, int4, fp16)
350 #[arg(long)]
351 quantize: Option<String>,
352 /// Strict mode: reject unverified architectures and fail on validation errors
353 #[arg(long)]
354 strict: bool,
355 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
356 /// Uses realizar's Q4K converter instead of dequantizing to F32
357 #[arg(long)]
358 preserve_q4k: bool,
359 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
360 /// Required if the GGUF has no embedded tokenizer vocabulary.
361 #[arg(long)]
362 tokenizer: Option<PathBuf>,
363 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
364 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
365 #[arg(long)]
366 enforce_provenance: bool,
367 /// GH-223: Allow import without config.json (default: error).
368 /// Without config.json, hyperparameters like rope_theta are inferred from
369 /// tensor shapes and may be wrong, producing garbage output.
370 #[arg(long)]
371 allow_no_config: bool,
372 },
373 /// Download and cache model OR HuggingFace dataset (Ollama-like UX)
374 Pull {
375 /// Model reference (alias, hf:// URI, or org/repo) OR "dataset"
376 /// asset-type discriminator. When this value is the literal
377 /// string "dataset", the next positional `repo` is the
378 /// HuggingFace dataset repo and dataset-pull semantics apply.
379 #[arg(value_name = "MODEL_OR_ASSET_TYPE")]
380 model_ref: String,
381 /// Dataset repository (used only when model_ref == "dataset").
382 /// Per `apr-cli-pull-dataset-v1.yaml`.
383 #[arg(value_name = "REPO")]
384 repo: Option<String>,
385 /// Force re-download even if cached
386 #[arg(long)]
387 force: bool,
388 /// CRUX-A-01: resolve short name to canonical URL and exit without
389 /// performing any network I/O.
390 #[arg(long)]
391 dry_run: bool,
392 /// CRUX-A-03: pin to a specific branch, tag, or git SHA on the remote
393 /// (HuggingFace Hub). Defaults to "main" when omitted.
394 #[arg(long, value_name = "REV")]
395 revision: Option<String>,
396 /// CRUX-A-20: offline mode — forbid any outbound network I/O.
397 /// Equivalent to APR_OFFLINE=1 or HF_HUB_OFFLINE=1 in the environment.
398 #[arg(long)]
399 offline: bool,
400 /// (dataset mode) Glob pattern for shard selection. May be passed
401 /// multiple times; matches are unioned. fnmatch-compatible
402 /// (`*`, `?`, `[a-z]`). No-match is fail-fast.
403 #[arg(long, value_name = "GLOB")]
404 include: Vec<String>,
405 /// (dataset mode) Output directory. Default:
406 /// `~/.cache/aprender/datasets/<repo>/`.
407 #[arg(short = 'o', long)]
408 output: Option<PathBuf>,
409 },
410 /// Registry operations (CRUX-A-01): inspect alias map, etc.
411 Registry {
412 #[command(subcommand)]
413 command: crate::commands::registry::RegistryCommands,
414 },
415 /// List cached models
416 #[command(name = "list", alias = "ls")]
417 List,
418 /// Remove model from cache
419 #[command(name = "rm", alias = "remove")]
420 Rm {
421 /// Model reference to remove
422 #[arg(value_name = "MODEL")]
423 model_ref: String,
424 },
425 /// Convert/optimize model
426 Convert {
427 /// Path to .apr model file
428 #[arg(value_name = "FILE")]
429 file: PathBuf,
430 /// Quantize to format (int8, int4, fp16, q4k)
431 #[arg(long)]
432 quantize: Option<String>,
433 /// Compress output (none, zstd, zstd-max, lz4)
434 #[arg(long)]
435 compress: Option<String>,
436 /// Output file path
437 #[arg(short, long)]
438 output: PathBuf,
439 /// Force overwrite existing files
440 #[arg(short, long)]
441 force: bool,
442 },
443 /// Stamp provenance fields (license, data_source, data_license) onto an existing .apr file
444 ///
445 /// SHIP-009 full-discharge enabler — patches the three provenance fields on
446 /// a pre-built APR v2 artifact (e.g., the shipped MODEL-1 teacher whose
447 /// fields are all (missing) because it was built before GATE-APR-PROV-001..003
448 /// shipped). Tensor bytes and header flags are preserved verbatim.
449 Stamp {
450 /// Path to input .apr model file
451 #[arg(value_name = "FILE")]
452 file: PathBuf,
453 /// SPDX license identifier (e.g., Apache-2.0)
454 #[arg(long)]
455 license: Option<String>,
456 /// Training-data source (e.g., huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
457 #[arg(long = "data-source")]
458 data_source: Option<String>,
459 /// SPDX license for data_source (e.g., Apache-2.0)
460 #[arg(long = "data-license")]
461 data_license: Option<String>,
462 /// Output file path
463 #[arg(short, long)]
464 output: PathBuf,
465 /// Force overwrite existing files
466 #[arg(short, long)]
467 force: bool,
468 },
469 /// Compile model into standalone executable (APR-SPEC §4.16)
470 Compile {
471 /// Input .apr model file
472 #[arg(value_name = "FILE", required_unless_present = "list_targets")]
473 file: Option<PathBuf>,
474 /// Output binary path (default: derived from model name)
475 #[arg(short, long)]
476 output: Option<PathBuf>,
477 /// Target triple (e.g., x86_64-unknown-linux-musl)
478 #[arg(long)]
479 target: Option<String>,
480 /// Quantize weights before embedding (int8, int4, fp16)
481 #[arg(long)]
482 quantize: Option<String>,
483 /// Release mode (optimized)
484 #[arg(long)]
485 release: bool,
486 /// Strip debug symbols
487 #[arg(long)]
488 strip: bool,
489 /// Enable LTO (Link-Time Optimization)
490 #[arg(long)]
491 lto: bool,
492 /// List available compilation targets
493 #[arg(long)]
494 list_targets: bool,
495 },
496 /// Merge multiple models
497 Merge {
498 /// Model files to merge
499 #[arg(value_name = "FILES", num_args = 2..)]
500 files: Vec<PathBuf>,
501 /// Merge strategy (average, weighted, slerp, ties, dare)
502 #[arg(long, default_value = "average")]
503 strategy: String,
504 /// Output file path (optional in --plan mode)
505 #[arg(short, long, required_unless_present = "plan")]
506 output: Option<PathBuf>,
507 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
508 #[arg(long, value_delimiter = ',')]
509 weights: Option<Vec<f32>>,
510 /// Base model for TIES/DARE (task vectors computed as delta from base)
511 #[arg(long)]
512 base_model: Option<PathBuf>,
513 /// DARE drop probability (default: 0.9)
514 #[arg(long, default_value = "0.9")]
515 drop_rate: f32,
516 /// TIES trim density threshold (default: 0.2)
517 #[arg(long, default_value = "0.2")]
518 density: f32,
519 /// RNG seed for DARE (default: 42)
520 #[arg(long, default_value = "42")]
521 seed: u64,
522 /// Plan mode (validate inputs, show merge plan, no execution)
523 #[arg(long)]
524 plan: bool,
525 },
526 /// Quantize model weights (GH-243)
527 Quantize {
528 /// Input model file
529 #[arg(value_name = "FILE")]
530 file: PathBuf,
531 /// Quantization scheme: int8, int4, fp16, q4k
532 #[arg(long, short = 's', default_value = "int4")]
533 scheme: String,
534 /// Output file path (required unless --plan)
535 #[arg(short, long)]
536 output: Option<PathBuf>,
537 /// Output format override (apr, gguf, safetensors)
538 #[arg(long)]
539 format: Option<String>,
540 /// Batch quantization (comma-separated schemes)
541 #[arg(long)]
542 batch: Option<String>,
543 /// Plan mode (estimate only, no execution)
544 #[arg(long)]
545 plan: bool,
546 /// Force overwrite existing files
547 #[arg(short, long)]
548 force: bool,
549 },
550 /// Model optimization commands (fine-tune, prune, distill)
551 #[command(flatten)]
552 ModelOps(ModelOpsCommands),
553 /// Start the MCP (Model Context Protocol) server over stdio
554 ///
555 /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
556 /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
557 Mcp {},
558 /// Interactive terminal UI
559 Tui {
560 /// Path to .apr model file
561 #[arg(value_name = "FILE")]
562 file: Option<PathBuf>,
563 },
564 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
565 Check {
566 /// Path to model file
567 #[arg(value_name = "FILE")]
568 file: PathBuf,
569 /// Disable GPU acceleration
570 #[arg(long)]
571 no_gpu: bool,
572 /// Output as JSON
573 #[arg(long)]
574 json: bool,
575 },
576 /// GPU status and VRAM reservation management (GPU-SHARE-001)
577 #[cfg(feature = "training")]
578 Gpu {
579 /// Show reservations as JSON
580 #[arg(long)]
581 json: bool,
582 },
583 /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
584 #[cfg(feature = "code")]
585 Code {
586 /// Path to local GGUF/APR model file (prefers .apr format)
587 #[arg(long)]
588 model: Option<PathBuf>,
589
590 /// Project directory (loads APR.md/CLAUDE.md from this path)
591 #[arg(long, default_value = ".")]
592 project: PathBuf,
593
594 /// Resume previous session (optionally by ID)
595 #[arg(long)]
596 resume: Option<Option<String>>,
597
598 /// Agent manifest (advanced — overrides defaults)
599 #[arg(long)]
600 manifest: Option<PathBuf>,
601
602 /// Initial prompt (non-interactive: print response and exit)
603 #[arg(short, long)]
604 print: bool,
605
606 /// Prompt text (positional, for -p mode)
607 #[arg(trailing_var_arg = true)]
608 prompt: Vec<String>,
609
610 /// Max turns before stopping
611 #[arg(long, default_value = "50")]
612 max_turns: u32,
613
614 /// Emit a `ccpa-trace.jsonl` describing the run to this path.
615 /// Format mirrors the schema at
616 /// <https://github.com/paiml/claude-code-parity-apr/blob/main/contracts/claude-code-parity-apr-v1.yaml>
617 /// (`§ trace_schema`). Used by `ccpa measure` to score apr-code
618 /// against canonical Claude Code reference fixtures.
619 #[arg(long)]
620 emit_trace: Option<PathBuf>,
621 },
622 /// Extended analysis, profiling, QA, and visualization commands
623 #[command(flatten)]
624 Extended(ExtendedCommands),
625
626 /// Monorepo management (publish, shims, audit, archive) [dev-only]
627 #[cfg(feature = "dev")]
628 #[command(subcommand)]
629 Mono(crate::commands::mono::MonoCommands),
630}