Skip to main content

apr_cli/
commands_enum.rs

1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4    /// Run model directly (auto-download, cache, execute)
5    Run {
6        /// Model source: local path, hf://org/repo, or URL
7        #[arg(value_name = "SOURCE")]
8        source: String,
9        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10        #[arg(value_name = "PROMPT")]
11        positional_prompt: Option<String>,
12        /// Input file (audio, text, etc.)
13        #[arg(short, long)]
14        input: Option<PathBuf>,
15        /// Text prompt for generation (for LLM models)
16        #[arg(short, long)]
17        prompt: Option<String>,
18        /// Maximum tokens to generate (default: 32)
19        #[arg(short = 'n', long, default_value = "32")]
20        max_tokens: usize,
21        /// Enable streaming output
22        #[arg(long)]
23        stream: bool,
24        /// Language code (for ASR models)
25        #[arg(short, long)]
26        language: Option<String>,
27        /// Task (transcribe, translate)
28        #[arg(short, long)]
29        task: Option<String>,
30        /// Output format (text, json, srt, vtt)
31        #[arg(short = 'f', long, default_value = "text")]
32        format: String,
33        /// Disable GPU acceleration (force CPU-only inference)
34        #[arg(long, alias = "cpu", conflicts_with = "gpu")]
35        no_gpu: bool,
36        /// Force GPU acceleration
37        #[arg(long, conflicts_with = "no_gpu")]
38        gpu: bool,
39        /// Offline mode: block all network access (Sovereign AI compliance)
40        #[arg(long)]
41        offline: bool,
42        /// Benchmark mode: output performance metrics (tok/s, latency)
43        #[arg(long)]
44        benchmark: bool,
45        /// Enable inference tracing (APR-TRACE-001)
46        #[arg(long)]
47        trace: bool,
48        /// Trace specific steps only (comma-separated)
49        #[arg(long, value_delimiter = ',')]
50        trace_steps: Option<Vec<String>>,
51        /// Verbose tracing (show tensor values)
52        #[arg(long)]
53        trace_verbose: bool,
54        /// Save trace output to JSON file
55        #[arg(long, value_name = "FILE")]
56        trace_output: Option<PathBuf>,
57        /// Trace detail level (none, basic, layer, payload, chrome)
58        /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59        /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60        #[arg(long, value_name = "LEVEL", default_value = "basic")]
61        trace_level: String,
62        /// Shorthand for --trace --trace-level payload (tensor value inspection)
63        #[arg(long)]
64        trace_payload: bool,
65        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66        #[arg(long)]
67        profile: bool,
68        /// Apply chat template for Instruct models (GAP-UX-001)
69        ///
70        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72        #[arg(long)]
73        chat: bool,
74        /// Sampling temperature (0.0 = greedy, default: 0.0)
75        #[arg(long, default_value = "0.0")]
76        temperature: f32,
77        /// Top-k sampling (default: 1 = greedy)
78        #[arg(long, default_value = "1")]
79        top_k: usize,
80        /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81        /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82        #[arg(long)]
83        top_p: Option<f32>,
84        /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85        /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86        #[arg(long, default_value = "299792458")]
87        seed: u64,
88        /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89        /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90        #[arg(long, default_value = "1.0")]
91        repeat_penalty: f32,
92        /// Context window for repetition penalty (number of recent tokens to check)
93        /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94        #[arg(long, default_value = "64")]
95        repeat_last_n: usize,
96        /// Process prompt tokens one-by-one instead of batched prefill.
97        /// Useful for debugging prefill correctness (comparing per-token attention).
98        /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99        #[arg(long)]
100        split_prompt: bool,
101        /// Batch mode: read prompts from JSONL, output results as JSONL.
102        /// Model loads once, processes all prompts sequentially.
103        /// Each input line: {"prompt": "...", "task_id": "..."}
104        /// Chat template is applied automatically.
105        #[arg(long, value_name = "FILE")]
106        batch_jsonl: Option<PathBuf>,
107        /// Show verbose output (model loading, backend info)
108        #[arg(short, long)]
109        verbose: bool,
110        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111        #[arg(long, value_name = "BACKEND")]
112        backend: Option<String>,
113    },
114    /// Inference server (plan/run)
115    Serve {
116        #[command(subcommand)]
117        command: ServeCommands,
118    },
119    /// Inspect model metadata, vocab, and structure
120    Inspect {
121        /// Path to .apr model file
122        #[arg(value_name = "FILE")]
123        file: PathBuf,
124        /// Show vocabulary details
125        #[arg(long)]
126        vocab: bool,
127        /// Show filter/security details
128        #[arg(long)]
129        filters: bool,
130        /// Show weight statistics
131        #[arg(long)]
132        weights: bool,
133        /// Output as JSON
134        #[arg(long)]
135        json: bool,
136    },
137    /// Simple debugging output ("drama" mode available)
138    Debug {
139        /// Path to .apr model file
140        #[arg(value_name = "FILE")]
141        file: PathBuf,
142        /// Theatrical "drama" mode output
143        #[arg(long)]
144        drama: bool,
145        /// Show hex dump
146        #[arg(long)]
147        hex: bool,
148        /// Extract ASCII strings
149        #[arg(long)]
150        strings: bool,
151        /// Limit output lines
152        #[arg(long, default_value = "256")]
153        limit: usize,
154    },
155    /// Validate model integrity and quality
156    Validate {
157        /// Path to .apr model file
158        #[arg(value_name = "FILE")]
159        file: PathBuf,
160        /// Show 100-point quality assessment
161        #[arg(long)]
162        quality: bool,
163        /// Strict validation (fail on warnings)
164        #[arg(long)]
165        strict: bool,
166        /// Minimum score to pass (0-100)
167        #[arg(long)]
168        min_score: Option<u8>,
169    },
170    /// Validate a publish manifest (FALSIFY-PM-001..006).
171    ///
172    /// Contract: `contracts/publish-manifest-v1.yaml`
173    /// Spec:     SPEC-SHIP-TWO-001 §12.3 AC-EX-004
174    ValidateManifest {
175        /// Path to manifest YAML
176        #[arg(value_name = "MANIFEST")]
177        file: PathBuf,
178        /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
179        #[arg(long, value_name = "APR_FILE")]
180        artifact: Option<PathBuf>,
181        /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
182        /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
183        /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
184        #[arg(long)]
185        live: bool,
186    },
187    /// Compare two models
188    Diff {
189        /// First model file
190        #[arg(value_name = "FILE1")]
191        file1: PathBuf,
192        /// Second model file
193        #[arg(value_name = "FILE2")]
194        file2: PathBuf,
195        /// Show weight-level differences
196        #[arg(long)]
197        weights: bool,
198        /// Compare actual tensor values with statistical analysis
199        #[arg(long)]
200        values: bool,
201        /// Filter tensors by name pattern (for --values)
202        #[arg(long)]
203        filter: Option<String>,
204        /// Maximum number of tensors to compare (for --values)
205        #[arg(long, default_value = "10")]
206        limit: usize,
207        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
208        #[arg(long)]
209        transpose_aware: bool,
210        /// Output as JSON
211        #[arg(long)]
212        json: bool,
213    },
214    /// List tensor names and shapes
215    Tensors {
216        /// Path to .apr model file
217        #[arg(value_name = "FILE")]
218        file: PathBuf,
219        /// Show tensor statistics (mean, std, min, max)
220        #[arg(long)]
221        stats: bool,
222        /// Filter tensors by name pattern
223        #[arg(long)]
224        filter: Option<String>,
225        /// Limit number of tensors shown (0 = unlimited)
226        #[arg(long, default_value = "0")]
227        limit: usize,
228        /// Output as JSON
229        #[arg(long)]
230        json: bool,
231    },
232    /// Layer-by-layer trace analysis
233    Trace {
234        /// Path to .apr model file
235        #[arg(value_name = "FILE")]
236        file: PathBuf,
237        /// Filter layers by name pattern
238        #[arg(long)]
239        layer: Option<String>,
240        /// Compare with reference model
241        #[arg(long)]
242        reference: Option<PathBuf>,
243        /// Output as JSON
244        #[arg(long)]
245        json: bool,
246        /// Verbose output with per-layer stats
247        #[arg(short, long)]
248        verbose: bool,
249        /// Trace payload through model
250        #[arg(long)]
251        payload: bool,
252        /// Diff mode
253        #[arg(long)]
254        diff: bool,
255        /// Interactive mode
256        #[arg(long)]
257        interactive: bool,
258    },
259    /// Check for best practices and conventions
260    Lint {
261        /// Path to .apr model file
262        #[arg(value_name = "FILE")]
263        file: PathBuf,
264    },
265    /// Explain errors, architecture, tensors, and kernel dispatch
266    Explain {
267        /// Error code, model file path, or family name (auto-detected)
268        #[arg(value_name = "CODE_OR_FILE")]
269        code_or_file: Option<String>,
270        /// Path to .apr model file (optional context for --tensor)
271        #[arg(short, long)]
272        file: Option<PathBuf>,
273        /// Explain a specific tensor
274        #[arg(long)]
275        tensor: Option<String>,
276        /// Explain kernel dispatch pipeline for architecture
277        #[arg(long)]
278        kernel: bool,
279        /// Output as JSON
280        #[arg(long)]
281        json: bool,
282        /// Show kernel contract details and proof obligations
283        #[arg(short, long)]
284        verbose: bool,
285        /// Show per-kernel proof status from contract tests
286        #[arg(long)]
287        proof_status: bool,
288    },
289    /// Manage canary tests for regression
290    Canary {
291        #[command(subcommand)]
292        command: CanaryCommands,
293    },
294    /// Export model to other formats
295    Export {
296        /// Path to .apr model file
297        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
298        file: Option<PathBuf>,
299        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
300        #[arg(long, default_value = "safetensors")]
301        format: String,
302        /// Output file/directory path
303        #[arg(short, long)]
304        output: Option<PathBuf>,
305        /// Apply quantization during export (int8, int4, fp16)
306        #[arg(long)]
307        quantize: Option<String>,
308        /// List all supported export formats
309        #[arg(long)]
310        list_formats: bool,
311        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
312        #[arg(long)]
313        batch: Option<String>,
314        /// Output in JSON format
315        #[arg(long)]
316        json: bool,
317        /// Plan mode (validate inputs, show export plan, no execution)
318        #[arg(long)]
319        plan: bool,
320    },
321    /// Import from external formats (hf://org/repo, local files, URLs)
322    Import {
323        /// Source: hf://org/repo, local file, or URL
324        #[arg(value_name = "SOURCE")]
325        source: String,
326        /// Output .apr file path (default: derived from source name)
327        #[arg(short, long)]
328        output: Option<PathBuf>,
329        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
330        #[arg(long, default_value = "auto")]
331        arch: String,
332        /// Quantization (int8, int4, fp16)
333        #[arg(long)]
334        quantize: Option<String>,
335        /// Strict mode: reject unverified architectures and fail on validation errors
336        #[arg(long)]
337        strict: bool,
338        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
339        /// Uses realizar's Q4K converter instead of dequantizing to F32
340        #[arg(long)]
341        preserve_q4k: bool,
342        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
343        /// Required if the GGUF has no embedded tokenizer vocabulary.
344        #[arg(long)]
345        tokenizer: Option<PathBuf>,
346        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
347        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
348        #[arg(long)]
349        enforce_provenance: bool,
350        /// GH-223: Allow import without config.json (default: error).
351        /// Without config.json, hyperparameters like rope_theta are inferred from
352        /// tensor shapes and may be wrong, producing garbage output.
353        #[arg(long)]
354        allow_no_config: bool,
355    },
356    /// Download and cache model from HuggingFace (Ollama-like UX)
357    Pull {
358        /// Model reference (alias, hf:// URI, or org/repo)
359        #[arg(value_name = "MODEL")]
360        model_ref: String,
361        /// Force re-download even if cached
362        #[arg(long)]
363        force: bool,
364    },
365    /// List cached models
366    #[command(name = "list", alias = "ls")]
367    List,
368    /// Remove model from cache
369    #[command(name = "rm", alias = "remove")]
370    Rm {
371        /// Model reference to remove
372        #[arg(value_name = "MODEL")]
373        model_ref: String,
374    },
375    /// Convert/optimize model
376    Convert {
377        /// Path to .apr model file
378        #[arg(value_name = "FILE")]
379        file: PathBuf,
380        /// Quantize to format (int8, int4, fp16, q4k)
381        #[arg(long)]
382        quantize: Option<String>,
383        /// Compress output (none, zstd, zstd-max, lz4)
384        #[arg(long)]
385        compress: Option<String>,
386        /// Output file path
387        #[arg(short, long)]
388        output: PathBuf,
389        /// Force overwrite existing files
390        #[arg(short, long)]
391        force: bool,
392    },
393    /// Compile model into standalone executable (APR-SPEC §4.16)
394    Compile {
395        /// Input .apr model file
396        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
397        file: Option<PathBuf>,
398        /// Output binary path (default: derived from model name)
399        #[arg(short, long)]
400        output: Option<PathBuf>,
401        /// Target triple (e.g., x86_64-unknown-linux-musl)
402        #[arg(long)]
403        target: Option<String>,
404        /// Quantize weights before embedding (int8, int4, fp16)
405        #[arg(long)]
406        quantize: Option<String>,
407        /// Release mode (optimized)
408        #[arg(long)]
409        release: bool,
410        /// Strip debug symbols
411        #[arg(long)]
412        strip: bool,
413        /// Enable LTO (Link-Time Optimization)
414        #[arg(long)]
415        lto: bool,
416        /// List available compilation targets
417        #[arg(long)]
418        list_targets: bool,
419    },
420    /// Merge multiple models
421    Merge {
422        /// Model files to merge
423        #[arg(value_name = "FILES", num_args = 2..)]
424        files: Vec<PathBuf>,
425        /// Merge strategy (average, weighted, slerp, ties, dare)
426        #[arg(long, default_value = "average")]
427        strategy: String,
428        /// Output file path (optional in --plan mode)
429        #[arg(short, long, required_unless_present = "plan")]
430        output: Option<PathBuf>,
431        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
432        #[arg(long, value_delimiter = ',')]
433        weights: Option<Vec<f32>>,
434        /// Base model for TIES/DARE (task vectors computed as delta from base)
435        #[arg(long)]
436        base_model: Option<PathBuf>,
437        /// DARE drop probability (default: 0.9)
438        #[arg(long, default_value = "0.9")]
439        drop_rate: f32,
440        /// TIES trim density threshold (default: 0.2)
441        #[arg(long, default_value = "0.2")]
442        density: f32,
443        /// RNG seed for DARE (default: 42)
444        #[arg(long, default_value = "42")]
445        seed: u64,
446        /// Plan mode (validate inputs, show merge plan, no execution)
447        #[arg(long)]
448        plan: bool,
449    },
450    /// Quantize model weights (GH-243)
451    Quantize {
452        /// Input model file
453        #[arg(value_name = "FILE")]
454        file: PathBuf,
455        /// Quantization scheme: int8, int4, fp16, q4k
456        #[arg(long, short = 's', default_value = "int4")]
457        scheme: String,
458        /// Output file path (required unless --plan)
459        #[arg(short, long)]
460        output: Option<PathBuf>,
461        /// Output format override (apr, gguf, safetensors)
462        #[arg(long)]
463        format: Option<String>,
464        /// Batch quantization (comma-separated schemes)
465        #[arg(long)]
466        batch: Option<String>,
467        /// Plan mode (estimate only, no execution)
468        #[arg(long)]
469        plan: bool,
470        /// Force overwrite existing files
471        #[arg(short, long)]
472        force: bool,
473    },
474    /// Model optimization commands (fine-tune, prune, distill)
475    #[command(flatten)]
476    ModelOps(ModelOpsCommands),
477    /// Start the MCP (Model Context Protocol) server over stdio
478    ///
479    /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
480    /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
481    Mcp {},
482    /// Interactive terminal UI
483    Tui {
484        /// Path to .apr model file
485        #[arg(value_name = "FILE")]
486        file: Option<PathBuf>,
487    },
488    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
489    Check {
490        /// Path to model file
491        #[arg(value_name = "FILE")]
492        file: PathBuf,
493        /// Disable GPU acceleration
494        #[arg(long)]
495        no_gpu: bool,
496        /// Output as JSON
497        #[arg(long)]
498        json: bool,
499    },
500    /// GPU status and VRAM reservation management (GPU-SHARE-001)
501    #[cfg(feature = "training")]
502    Gpu {
503        /// Show reservations as JSON
504        #[arg(long)]
505        json: bool,
506    },
507    /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
508    #[cfg(feature = "code")]
509    Code {
510        /// Path to local GGUF/APR model file (prefers .apr format)
511        #[arg(long)]
512        model: Option<PathBuf>,
513
514        /// Project directory (loads APR.md/CLAUDE.md from this path)
515        #[arg(long, default_value = ".")]
516        project: PathBuf,
517
518        /// Resume previous session (optionally by ID)
519        #[arg(long)]
520        resume: Option<Option<String>>,
521
522        /// Agent manifest (advanced — overrides defaults)
523        #[arg(long)]
524        manifest: Option<PathBuf>,
525
526        /// Initial prompt (non-interactive: print response and exit)
527        #[arg(short, long)]
528        print: bool,
529
530        /// Prompt text (positional, for -p mode)
531        #[arg(trailing_var_arg = true)]
532        prompt: Vec<String>,
533
534        /// Max turns before stopping
535        #[arg(long, default_value = "50")]
536        max_turns: u32,
537    },
538    /// Extended analysis, profiling, QA, and visualization commands
539    #[command(flatten)]
540    Extended(ExtendedCommands),
541
542    /// Monorepo management (publish, shims, audit, archive) [dev-only]
543    #[cfg(feature = "dev")]
544    #[command(subcommand)]
545    Mono(crate::commands::mono::MonoCommands),
546}