Skip to main content

apr_cli/
commands_enum.rs

1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4    /// Run model directly (auto-download, cache, execute)
5    Run {
6        /// Model source: local path, hf://org/repo, or URL
7        #[arg(value_name = "SOURCE")]
8        source: String,
9        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10        #[arg(value_name = "PROMPT")]
11        positional_prompt: Option<String>,
12        /// Input file (audio, text, etc.)
13        #[arg(short, long)]
14        input: Option<PathBuf>,
15        /// Text prompt for generation (for LLM models)
16        #[arg(short, long)]
17        prompt: Option<String>,
18        /// Maximum tokens to generate (default: 32)
19        #[arg(short = 'n', long, default_value = "32")]
20        max_tokens: usize,
21        /// Enable streaming output
22        #[arg(long)]
23        stream: bool,
24        /// Language code (for ASR models)
25        #[arg(short, long)]
26        language: Option<String>,
27        /// Task (transcribe, translate)
28        #[arg(short, long)]
29        task: Option<String>,
30        /// Output format (text, json, srt, vtt)
31        #[arg(short = 'f', long, default_value = "text")]
32        format: String,
33        /// Disable GPU acceleration (force CPU-only inference)
34        #[arg(long, alias = "cpu", conflicts_with = "gpu")]
35        no_gpu: bool,
36        /// Force GPU acceleration
37        #[arg(long, conflicts_with = "no_gpu")]
38        gpu: bool,
39        /// Offline mode: block all network access (Sovereign AI compliance)
40        #[arg(long)]
41        offline: bool,
42        /// Benchmark mode: output performance metrics (tok/s, latency)
43        #[arg(long)]
44        benchmark: bool,
45        /// Enable inference tracing (APR-TRACE-001)
46        #[arg(long)]
47        trace: bool,
48        /// Trace specific steps only (comma-separated)
49        #[arg(long, value_delimiter = ',')]
50        trace_steps: Option<Vec<String>>,
51        /// Verbose tracing (show tensor values)
52        #[arg(long)]
53        trace_verbose: bool,
54        /// Save trace output to JSON file
55        #[arg(long, value_name = "FILE")]
56        trace_output: Option<PathBuf>,
57        /// Trace detail level (none, basic, layer, payload, chrome)
58        /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59        /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60        #[arg(long, value_name = "LEVEL", default_value = "basic")]
61        trace_level: String,
62        /// Shorthand for --trace --trace-level payload (tensor value inspection)
63        #[arg(long)]
64        trace_payload: bool,
65        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66        #[arg(long)]
67        profile: bool,
68        /// Apply chat template for Instruct models (GAP-UX-001)
69        ///
70        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72        #[arg(long)]
73        chat: bool,
74        /// Sampling temperature (0.0 = greedy, default: 0.0)
75        #[arg(long, default_value = "0.0")]
76        temperature: f32,
77        /// Top-k sampling (default: 1 = greedy)
78        #[arg(long, default_value = "1")]
79        top_k: usize,
80        /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81        /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82        #[arg(long)]
83        top_p: Option<f32>,
84        /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85        /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86        #[arg(long, default_value = "299792458")]
87        seed: u64,
88        /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89        /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90        #[arg(long, default_value = "1.0")]
91        repeat_penalty: f32,
92        /// Context window for repetition penalty (number of recent tokens to check)
93        /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94        #[arg(long, default_value = "64")]
95        repeat_last_n: usize,
96        /// Process prompt tokens one-by-one instead of batched prefill.
97        /// Useful for debugging prefill correctness (comparing per-token attention).
98        /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99        #[arg(long)]
100        split_prompt: bool,
101        /// Batch mode: read prompts from JSONL, output results as JSONL.
102        /// Model loads once, processes all prompts sequentially.
103        /// Each input line: {"prompt": "...", "task_id": "..."}
104        /// Chat template is applied automatically.
105        #[arg(long, value_name = "FILE")]
106        batch_jsonl: Option<PathBuf>,
107        /// Show verbose output (model loading, backend info)
108        #[arg(short, long)]
109        verbose: bool,
110        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111        #[arg(long, value_name = "BACKEND")]
112        backend: Option<String>,
113    },
114    /// Inference server (plan/run)
115    Serve {
116        #[command(subcommand)]
117        command: ServeCommands,
118    },
119    /// Inspect model metadata, vocab, and structure
120    Inspect {
121        /// Path to .apr model file
122        #[arg(value_name = "FILE")]
123        file: PathBuf,
124        /// Show vocabulary details
125        #[arg(long)]
126        vocab: bool,
127        /// Show filter/security details
128        #[arg(long)]
129        filters: bool,
130        /// Show weight statistics
131        #[arg(long)]
132        weights: bool,
133        /// Output as JSON
134        #[arg(long)]
135        json: bool,
136    },
137    /// Simple debugging output ("drama" mode available)
138    Debug {
139        /// Path to .apr model file
140        #[arg(value_name = "FILE")]
141        file: PathBuf,
142        /// Theatrical "drama" mode output
143        #[arg(long)]
144        drama: bool,
145        /// Show hex dump
146        #[arg(long)]
147        hex: bool,
148        /// Extract ASCII strings
149        #[arg(long)]
150        strings: bool,
151        /// Limit output lines
152        #[arg(long, default_value = "256")]
153        limit: usize,
154    },
155    /// Validate model integrity and quality
156    Validate {
157        /// Path to .apr model file
158        #[arg(value_name = "FILE")]
159        file: PathBuf,
160        /// Show 100-point quality assessment
161        #[arg(long)]
162        quality: bool,
163        /// Strict validation (fail on warnings)
164        #[arg(long)]
165        strict: bool,
166        /// Minimum score to pass (0-100)
167        #[arg(long)]
168        min_score: Option<u8>,
169    },
170    /// Validate a publish manifest (FALSIFY-PM-001..006).
171    ///
172    /// Contract: `contracts/publish-manifest-v1.yaml`
173    /// Spec:     SPEC-SHIP-TWO-001 §12.3 AC-EX-004
174    ValidateManifest {
175        /// Path to manifest YAML
176        #[arg(value_name = "MANIFEST")]
177        file: PathBuf,
178        /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
179        #[arg(long, value_name = "APR_FILE")]
180        artifact: Option<PathBuf>,
181        /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
182        /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
183        /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
184        #[arg(long)]
185        live: bool,
186    },
187    /// Compare two models
188    Diff {
189        /// First model file
190        #[arg(value_name = "FILE1")]
191        file1: PathBuf,
192        /// Second model file
193        #[arg(value_name = "FILE2")]
194        file2: PathBuf,
195        /// Show weight-level differences
196        #[arg(long)]
197        weights: bool,
198        /// Compare actual tensor values with statistical analysis
199        #[arg(long)]
200        values: bool,
201        /// Filter tensors by name pattern (for --values)
202        #[arg(long)]
203        filter: Option<String>,
204        /// Maximum number of tensors to compare (for --values)
205        #[arg(long, default_value = "10")]
206        limit: usize,
207        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
208        #[arg(long)]
209        transpose_aware: bool,
210        /// Output as JSON
211        #[arg(long)]
212        json: bool,
213    },
214    /// List tensor names and shapes
215    Tensors {
216        /// Path to .apr model file
217        #[arg(value_name = "FILE")]
218        file: PathBuf,
219        /// Show tensor statistics (mean, std, min, max)
220        #[arg(long)]
221        stats: bool,
222        /// Filter tensors by name pattern
223        #[arg(long)]
224        filter: Option<String>,
225        /// Limit number of tensors shown (0 = unlimited)
226        #[arg(long, default_value = "0")]
227        limit: usize,
228        /// Output as JSON
229        #[arg(long)]
230        json: bool,
231    },
232    /// Layer-by-layer trace analysis
233    Trace {
234        /// Path to .apr model file
235        #[arg(value_name = "FILE")]
236        file: PathBuf,
237        /// Filter layers by name pattern
238        #[arg(long)]
239        layer: Option<String>,
240        /// Compare with reference model
241        #[arg(long)]
242        reference: Option<PathBuf>,
243        /// Output as JSON
244        #[arg(long)]
245        json: bool,
246        /// Verbose output with per-layer stats
247        #[arg(short, long)]
248        verbose: bool,
249        /// Trace payload through model
250        #[arg(long)]
251        payload: bool,
252        /// Diff mode
253        #[arg(long)]
254        diff: bool,
255        /// Interactive mode
256        #[arg(long)]
257        interactive: bool,
258        /// Save per-stage F32 tensors during trace for SHIP-007 layer-0
259        /// element-wise diff. Comma-separated stage names from
260        /// `apr-cli-trace-save-tensor-v1.yaml` (e.g.
261        /// `embedding,qkv_matmul,attention`). Pass `all` to save every
262        /// stage. Output goes to `--save-tensor-dir` if provided,
263        /// else `<file_dir>/trace-tensors/<run_id>/`.
264        #[arg(long, value_name = "STAGES")]
265        save_tensor: Option<String>,
266        /// Output directory for `--save-tensor` (default: sibling
267        /// `trace-tensors/<run_id>/`).
268        #[arg(long, value_name = "DIR")]
269        save_tensor_dir: Option<PathBuf>,
270        /// Layer-id range for `--save-tensor` (default: 0..1, i.e.
271        /// layer 0 only). Format: `START..END` (Rust range syntax,
272        /// END exclusive).
273        #[arg(long, value_name = "RANGE", default_value = "0..1")]
274        save_tensor_layers: String,
275    },
276    /// Check for best practices and conventions
277    Lint {
278        /// Path to .apr model file
279        #[arg(value_name = "FILE")]
280        file: PathBuf,
281    },
282    /// Explain errors, architecture, tensors, and kernel dispatch
283    Explain {
284        /// Error code, model file path, or family name (auto-detected)
285        #[arg(value_name = "CODE_OR_FILE")]
286        code_or_file: Option<String>,
287        /// Path to .apr model file (optional context for --tensor)
288        #[arg(short, long)]
289        file: Option<PathBuf>,
290        /// Explain a specific tensor
291        #[arg(long)]
292        tensor: Option<String>,
293        /// Explain kernel dispatch pipeline for architecture
294        #[arg(long)]
295        kernel: bool,
296        /// Output as JSON
297        #[arg(long)]
298        json: bool,
299        /// Show kernel contract details and proof obligations
300        #[arg(short, long)]
301        verbose: bool,
302        /// Show per-kernel proof status from contract tests
303        #[arg(long)]
304        proof_status: bool,
305    },
306    /// Manage canary tests for regression
307    Canary {
308        #[command(subcommand)]
309        command: CanaryCommands,
310    },
311    /// Export model to other formats
312    Export {
313        /// Path to .apr model file
314        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
315        file: Option<PathBuf>,
316        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
317        #[arg(long, default_value = "safetensors")]
318        format: String,
319        /// Output file/directory path
320        #[arg(short, long)]
321        output: Option<PathBuf>,
322        /// Apply quantization during export (int8, int4, fp16)
323        #[arg(long)]
324        quantize: Option<String>,
325        /// List all supported export formats
326        #[arg(long)]
327        list_formats: bool,
328        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
329        #[arg(long)]
330        batch: Option<String>,
331        /// Output in JSON format
332        #[arg(long)]
333        json: bool,
334        /// Plan mode (validate inputs, show export plan, no execution)
335        #[arg(long)]
336        plan: bool,
337    },
338    /// Import from external formats (hf://org/repo, local files, URLs)
339    Import {
340        /// Source: hf://org/repo, local file, or URL
341        #[arg(value_name = "SOURCE")]
342        source: String,
343        /// Output .apr file path (default: derived from source name)
344        #[arg(short, long)]
345        output: Option<PathBuf>,
346        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
347        #[arg(long, default_value = "auto")]
348        arch: String,
349        /// Quantization (int8, int4, fp16)
350        #[arg(long)]
351        quantize: Option<String>,
352        /// Strict mode: reject unverified architectures and fail on validation errors
353        #[arg(long)]
354        strict: bool,
355        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
356        /// Uses realizar's Q4K converter instead of dequantizing to F32
357        #[arg(long)]
358        preserve_q4k: bool,
359        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
360        /// Required if the GGUF has no embedded tokenizer vocabulary.
361        #[arg(long)]
362        tokenizer: Option<PathBuf>,
363        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
364        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
365        #[arg(long)]
366        enforce_provenance: bool,
367        /// GH-223: Allow import without config.json (default: error).
368        /// Without config.json, hyperparameters like rope_theta are inferred from
369        /// tensor shapes and may be wrong, producing garbage output.
370        #[arg(long)]
371        allow_no_config: bool,
372    },
373    /// Download and cache model OR HuggingFace dataset (Ollama-like UX)
374    Pull {
375        /// Model reference (alias, hf:// URI, or org/repo) OR "dataset"
376        /// asset-type discriminator. When this value is the literal
377        /// string "dataset", the next positional `repo` is the
378        /// HuggingFace dataset repo and dataset-pull semantics apply.
379        #[arg(value_name = "MODEL_OR_ASSET_TYPE")]
380        model_ref: String,
381        /// Dataset repository (used only when model_ref == "dataset").
382        /// Per `apr-cli-pull-dataset-v1.yaml`.
383        #[arg(value_name = "REPO")]
384        repo: Option<String>,
385        /// Force re-download even if cached
386        #[arg(long)]
387        force: bool,
388        /// CRUX-A-01: resolve short name to canonical URL and exit without
389        /// performing any network I/O.
390        #[arg(long)]
391        dry_run: bool,
392        /// CRUX-A-03: pin to a specific branch, tag, or git SHA on the remote
393        /// (HuggingFace Hub). Defaults to "main" when omitted.
394        #[arg(long, value_name = "REV")]
395        revision: Option<String>,
396        /// CRUX-A-20: offline mode — forbid any outbound network I/O.
397        /// Equivalent to APR_OFFLINE=1 or HF_HUB_OFFLINE=1 in the environment.
398        #[arg(long)]
399        offline: bool,
400        /// (dataset mode) Glob pattern for shard selection. May be passed
401        /// multiple times; matches are unioned. fnmatch-compatible
402        /// (`*`, `?`, `[a-z]`). No-match is fail-fast.
403        #[arg(long, value_name = "GLOB")]
404        include: Vec<String>,
405        /// (dataset mode) Output directory. Default:
406        /// `~/.cache/aprender/datasets/<repo>/`.
407        #[arg(short = 'o', long)]
408        output: Option<PathBuf>,
409    },
410    /// Registry operations (CRUX-A-01): inspect alias map, etc.
411    Registry {
412        #[command(subcommand)]
413        command: crate::commands::registry::RegistryCommands,
414    },
415    /// List cached models
416    #[command(name = "list", alias = "ls")]
417    List,
418    /// Remove model from cache
419    #[command(name = "rm", alias = "remove")]
420    Rm {
421        /// Model reference to remove
422        #[arg(value_name = "MODEL")]
423        model_ref: String,
424    },
425    /// Convert/optimize model
426    Convert {
427        /// Path to .apr model file
428        #[arg(value_name = "FILE")]
429        file: PathBuf,
430        /// Quantize to format (int8, int4, fp16, q4k)
431        #[arg(long)]
432        quantize: Option<String>,
433        /// Compress output (none, zstd, zstd-max, lz4)
434        #[arg(long)]
435        compress: Option<String>,
436        /// Output file path
437        #[arg(short, long)]
438        output: PathBuf,
439        /// Force overwrite existing files
440        #[arg(short, long)]
441        force: bool,
442    },
443    /// Stamp provenance fields (license, data_source, data_license) onto an existing .apr file
444    ///
445    /// SHIP-009 full-discharge enabler — patches the three provenance fields on
446    /// a pre-built APR v2 artifact (e.g., the shipped MODEL-1 teacher whose
447    /// fields are all (missing) because it was built before GATE-APR-PROV-001..003
448    /// shipped). Tensor bytes and header flags are preserved verbatim.
449    Stamp {
450        /// Path to input .apr model file
451        #[arg(value_name = "FILE")]
452        file: PathBuf,
453        /// SPDX license identifier (e.g., Apache-2.0)
454        #[arg(long)]
455        license: Option<String>,
456        /// Training-data source (e.g., huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
457        #[arg(long = "data-source")]
458        data_source: Option<String>,
459        /// SPDX license for data_source (e.g., Apache-2.0)
460        #[arg(long = "data-license")]
461        data_license: Option<String>,
462        /// Output file path
463        #[arg(short, long)]
464        output: PathBuf,
465        /// Force overwrite existing files
466        #[arg(short, long)]
467        force: bool,
468    },
469    /// Compile model into standalone executable (APR-SPEC §4.16)
470    Compile {
471        /// Input .apr model file
472        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
473        file: Option<PathBuf>,
474        /// Output binary path (default: derived from model name)
475        #[arg(short, long)]
476        output: Option<PathBuf>,
477        /// Target triple (e.g., x86_64-unknown-linux-musl)
478        #[arg(long)]
479        target: Option<String>,
480        /// Quantize weights before embedding (int8, int4, fp16)
481        #[arg(long)]
482        quantize: Option<String>,
483        /// Release mode (optimized)
484        #[arg(long)]
485        release: bool,
486        /// Strip debug symbols
487        #[arg(long)]
488        strip: bool,
489        /// Enable LTO (Link-Time Optimization)
490        #[arg(long)]
491        lto: bool,
492        /// List available compilation targets
493        #[arg(long)]
494        list_targets: bool,
495    },
496    /// Merge multiple models
497    Merge {
498        /// Model files to merge
499        #[arg(value_name = "FILES", num_args = 2..)]
500        files: Vec<PathBuf>,
501        /// Merge strategy (average, weighted, slerp, ties, dare)
502        #[arg(long, default_value = "average")]
503        strategy: String,
504        /// Output file path (optional in --plan mode)
505        #[arg(short, long, required_unless_present = "plan")]
506        output: Option<PathBuf>,
507        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
508        #[arg(long, value_delimiter = ',')]
509        weights: Option<Vec<f32>>,
510        /// Base model for TIES/DARE (task vectors computed as delta from base)
511        #[arg(long)]
512        base_model: Option<PathBuf>,
513        /// DARE drop probability (default: 0.9)
514        #[arg(long, default_value = "0.9")]
515        drop_rate: f32,
516        /// TIES trim density threshold (default: 0.2)
517        #[arg(long, default_value = "0.2")]
518        density: f32,
519        /// RNG seed for DARE (default: 42)
520        #[arg(long, default_value = "42")]
521        seed: u64,
522        /// Plan mode (validate inputs, show merge plan, no execution)
523        #[arg(long)]
524        plan: bool,
525    },
526    /// Quantize model weights (GH-243)
527    Quantize {
528        /// Input model file
529        #[arg(value_name = "FILE")]
530        file: PathBuf,
531        /// Quantization scheme: int8, int4, fp16, q4k
532        #[arg(long, short = 's', default_value = "int4")]
533        scheme: String,
534        /// Output file path (required unless --plan)
535        #[arg(short, long)]
536        output: Option<PathBuf>,
537        /// Output format override (apr, gguf, safetensors)
538        #[arg(long)]
539        format: Option<String>,
540        /// Batch quantization (comma-separated schemes)
541        #[arg(long)]
542        batch: Option<String>,
543        /// Plan mode (estimate only, no execution)
544        #[arg(long)]
545        plan: bool,
546        /// Force overwrite existing files
547        #[arg(short, long)]
548        force: bool,
549    },
550    /// Model optimization commands (fine-tune, prune, distill)
551    #[command(flatten)]
552    ModelOps(ModelOpsCommands),
553    /// Start the MCP (Model Context Protocol) server over stdio
554    ///
555    /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
556    /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
557    Mcp {},
558    /// Interactive terminal UI
559    Tui {
560        /// Path to .apr model file
561        #[arg(value_name = "FILE")]
562        file: Option<PathBuf>,
563    },
564    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
565    Check {
566        /// Path to model file
567        #[arg(value_name = "FILE")]
568        file: PathBuf,
569        /// Disable GPU acceleration
570        #[arg(long)]
571        no_gpu: bool,
572        /// Output as JSON
573        #[arg(long)]
574        json: bool,
575    },
576    /// GPU status and VRAM reservation management (GPU-SHARE-001)
577    #[cfg(feature = "training")]
578    Gpu {
579        /// Show reservations as JSON
580        #[arg(long)]
581        json: bool,
582    },
583    /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
584    #[cfg(feature = "code")]
585    Code {
586        /// Path to local GGUF/APR model file (prefers .apr format)
587        #[arg(long)]
588        model: Option<PathBuf>,
589
590        /// Project directory (loads APR.md/CLAUDE.md from this path)
591        #[arg(long, default_value = ".")]
592        project: PathBuf,
593
594        /// Resume previous session (optionally by ID)
595        #[arg(long)]
596        resume: Option<Option<String>>,
597
598        /// Agent manifest (advanced — overrides defaults)
599        #[arg(long)]
600        manifest: Option<PathBuf>,
601
602        /// Initial prompt (non-interactive: print response and exit)
603        #[arg(short, long)]
604        print: bool,
605
606        /// Prompt text (positional, for -p mode)
607        #[arg(trailing_var_arg = true)]
608        prompt: Vec<String>,
609
610        /// Max turns before stopping
611        #[arg(long, default_value = "50")]
612        max_turns: u32,
613
614        /// Emit a `ccpa-trace.jsonl` describing the run to this path.
615        /// Format mirrors the schema at
616        /// <https://github.com/paiml/claude-code-parity-apr/blob/main/contracts/claude-code-parity-apr-v1.yaml>
617        /// (`§ trace_schema`). Used by `ccpa measure` to score apr-code
618        /// against canonical Claude Code reference fixtures.
619        #[arg(long)]
620        emit_trace: Option<PathBuf>,
621    },
622    /// Extended analysis, profiling, QA, and visualization commands
623    #[command(flatten)]
624    Extended(ExtendedCommands),
625
626    /// Monorepo management (publish, shims, audit, archive) [dev-only]
627    #[cfg(feature = "dev")]
628    #[command(subcommand)]
629    Mono(crate::commands::mono::MonoCommands),
630}