apr_cli/
commands_enum.rs

1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4    /// Run model directly (auto-download, cache, execute)
5    Run {
6        /// Model source: local path, hf://org/repo, or URL
7        #[arg(value_name = "SOURCE")]
8        source: String,
9        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10        #[arg(value_name = "PROMPT")]
11        positional_prompt: Option<String>,
12        /// Input file (audio, text, etc.)
13        #[arg(short, long)]
14        input: Option<PathBuf>,
15        /// Text prompt for generation (for LLM models)
16        #[arg(short, long)]
17        prompt: Option<String>,
18        /// Maximum tokens to generate (default: 32)
19        #[arg(short = 'n', long, default_value = "32")]
20        max_tokens: usize,
21        /// Enable streaming output
22        #[arg(long)]
23        stream: bool,
24        /// Language code (for ASR models)
25        #[arg(short, long)]
26        language: Option<String>,
27        /// Task (transcribe, translate)
28        #[arg(short, long)]
29        task: Option<String>,
30        /// Output format (text, json, srt, vtt)
31        #[arg(short = 'f', long, default_value = "text")]
32        format: String,
33        /// Disable GPU acceleration
34        #[arg(long, conflicts_with = "gpu")]
35        no_gpu: bool,
36        /// Force GPU acceleration
37        #[arg(long, conflicts_with = "no_gpu")]
38        gpu: bool,
39        /// Offline mode: block all network access (Sovereign AI compliance)
40        #[arg(long)]
41        offline: bool,
42        /// Benchmark mode: output performance metrics (tok/s, latency)
43        #[arg(long)]
44        benchmark: bool,
45        /// Enable inference tracing (APR-TRACE-001)
46        #[arg(long)]
47        trace: bool,
48        /// Trace specific steps only (comma-separated)
49        #[arg(long, value_delimiter = ',')]
50        trace_steps: Option<Vec<String>>,
51        /// Verbose tracing (show tensor values)
52        #[arg(long)]
53        trace_verbose: bool,
54        /// Save trace output to JSON file
55        #[arg(long, value_name = "FILE")]
56        trace_output: Option<PathBuf>,
57        /// Trace detail level (none, basic, layer, payload, chrome)
58        /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59        /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60        #[arg(long, value_name = "LEVEL", default_value = "basic")]
61        trace_level: String,
62        /// Shorthand for --trace --trace-level payload (tensor value inspection)
63        #[arg(long)]
64        trace_payload: bool,
65        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66        #[arg(long)]
67        profile: bool,
68        /// Apply chat template for Instruct models (GAP-UX-001)
69        ///
70        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72        #[arg(long)]
73        chat: bool,
74        /// Sampling temperature (0.0 = greedy, default: 0.0)
75        #[arg(long, default_value = "0.0")]
76        temperature: f32,
77        /// Top-k sampling (default: 1 = greedy)
78        #[arg(long, default_value = "1")]
79        top_k: usize,
80        /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81        /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82        #[arg(long)]
83        top_p: Option<f32>,
84        /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85        /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86        #[arg(long, default_value = "299792458")]
87        seed: u64,
88        /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89        /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90        #[arg(long, default_value = "1.0")]
91        repeat_penalty: f32,
92        /// Context window for repetition penalty (number of recent tokens to check)
93        /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94        #[arg(long, default_value = "64")]
95        repeat_last_n: usize,
96        /// Process prompt tokens one-by-one instead of batched prefill.
97        /// Useful for debugging prefill correctness (comparing per-token attention).
98        /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99        #[arg(long)]
100        split_prompt: bool,
101        /// Batch mode: read prompts from JSONL, output results as JSONL.
102        /// Model loads once, processes all prompts sequentially.
103        /// Each input line: {"prompt": "...", "task_id": "..."}
104        /// Chat template is applied automatically.
105        #[arg(long, value_name = "FILE")]
106        batch_jsonl: Option<PathBuf>,
107        /// Show verbose output (model loading, backend info)
108        #[arg(short, long)]
109        verbose: bool,
110        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111        #[arg(long, value_name = "BACKEND")]
112        backend: Option<String>,
113    },
114    /// Inference server (plan/run)
115    Serve {
116        #[command(subcommand)]
117        command: ServeCommands,
118    },
119    /// Inspect model metadata, vocab, and structure
120    Inspect {
121        /// Path to .apr model file
122        #[arg(value_name = "FILE")]
123        file: PathBuf,
124        /// Show vocabulary details
125        #[arg(long)]
126        vocab: bool,
127        /// Show filter/security details
128        #[arg(long)]
129        filters: bool,
130        /// Show weight statistics
131        #[arg(long)]
132        weights: bool,
133        /// Output as JSON
134        #[arg(long)]
135        json: bool,
136    },
137    /// Simple debugging output ("drama" mode available)
138    Debug {
139        /// Path to .apr model file
140        #[arg(value_name = "FILE")]
141        file: PathBuf,
142        /// Theatrical "drama" mode output
143        #[arg(long)]
144        drama: bool,
145        /// Show hex dump
146        #[arg(long)]
147        hex: bool,
148        /// Extract ASCII strings
149        #[arg(long)]
150        strings: bool,
151        /// Limit output lines
152        #[arg(long, default_value = "256")]
153        limit: usize,
154    },
155    /// Validate model integrity and quality
156    Validate {
157        /// Path to .apr model file
158        #[arg(value_name = "FILE")]
159        file: PathBuf,
160        /// Show 100-point quality assessment
161        #[arg(long)]
162        quality: bool,
163        /// Strict validation (fail on warnings)
164        #[arg(long)]
165        strict: bool,
166        /// Minimum score to pass (0-100)
167        #[arg(long)]
168        min_score: Option<u8>,
169    },
170    /// Compare two models
171    Diff {
172        /// First model file
173        #[arg(value_name = "FILE1")]
174        file1: PathBuf,
175        /// Second model file
176        #[arg(value_name = "FILE2")]
177        file2: PathBuf,
178        /// Show weight-level differences
179        #[arg(long)]
180        weights: bool,
181        /// Compare actual tensor values with statistical analysis
182        #[arg(long)]
183        values: bool,
184        /// Filter tensors by name pattern (for --values)
185        #[arg(long)]
186        filter: Option<String>,
187        /// Maximum number of tensors to compare (for --values)
188        #[arg(long, default_value = "10")]
189        limit: usize,
190        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
191        #[arg(long)]
192        transpose_aware: bool,
193        /// Output as JSON
194        #[arg(long)]
195        json: bool,
196    },
197    /// List tensor names and shapes
198    Tensors {
199        /// Path to .apr model file
200        #[arg(value_name = "FILE")]
201        file: PathBuf,
202        /// Show tensor statistics (mean, std, min, max)
203        #[arg(long)]
204        stats: bool,
205        /// Filter tensors by name pattern
206        #[arg(long)]
207        filter: Option<String>,
208        /// Limit number of tensors shown (0 = unlimited)
209        #[arg(long, default_value = "0")]
210        limit: usize,
211        /// Output as JSON
212        #[arg(long)]
213        json: bool,
214    },
215    /// Layer-by-layer trace analysis
216    Trace {
217        /// Path to .apr model file
218        #[arg(value_name = "FILE")]
219        file: PathBuf,
220        /// Filter layers by name pattern
221        #[arg(long)]
222        layer: Option<String>,
223        /// Compare with reference model
224        #[arg(long)]
225        reference: Option<PathBuf>,
226        /// Output as JSON
227        #[arg(long)]
228        json: bool,
229        /// Verbose output with per-layer stats
230        #[arg(short, long)]
231        verbose: bool,
232        /// Trace payload through model
233        #[arg(long)]
234        payload: bool,
235        /// Diff mode
236        #[arg(long)]
237        diff: bool,
238        /// Interactive mode
239        #[arg(long)]
240        interactive: bool,
241    },
242    /// Check for best practices and conventions
243    Lint {
244        /// Path to .apr model file
245        #[arg(value_name = "FILE")]
246        file: PathBuf,
247    },
248    /// Explain errors, architecture, tensors, and kernel dispatch
249    Explain {
250        /// Error code, model file path, or family name (auto-detected)
251        #[arg(value_name = "CODE_OR_FILE")]
252        code_or_file: Option<String>,
253        /// Path to .apr model file (optional context for --tensor)
254        #[arg(short, long)]
255        file: Option<PathBuf>,
256        /// Explain a specific tensor
257        #[arg(long)]
258        tensor: Option<String>,
259        /// Explain kernel dispatch pipeline for architecture
260        #[arg(long)]
261        kernel: bool,
262        /// Output as JSON
263        #[arg(long)]
264        json: bool,
265        /// Show kernel contract details and proof obligations
266        #[arg(short, long)]
267        verbose: bool,
268        /// Show per-kernel proof status from contract tests
269        #[arg(long)]
270        proof_status: bool,
271    },
272    /// Manage canary tests for regression
273    Canary {
274        #[command(subcommand)]
275        command: CanaryCommands,
276    },
277    /// Export model to other formats
278    Export {
279        /// Path to .apr model file
280        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
281        file: Option<PathBuf>,
282        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
283        #[arg(long, default_value = "safetensors")]
284        format: String,
285        /// Output file/directory path
286        #[arg(short, long)]
287        output: Option<PathBuf>,
288        /// Apply quantization during export (int8, int4, fp16)
289        #[arg(long)]
290        quantize: Option<String>,
291        /// List all supported export formats
292        #[arg(long)]
293        list_formats: bool,
294        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
295        #[arg(long)]
296        batch: Option<String>,
297        /// Output in JSON format
298        #[arg(long)]
299        json: bool,
300        /// Plan mode (validate inputs, show export plan, no execution)
301        #[arg(long)]
302        plan: bool,
303    },
304    /// Import from external formats (hf://org/repo, local files, URLs)
305    Import {
306        /// Source: hf://org/repo, local file, or URL
307        #[arg(value_name = "SOURCE")]
308        source: String,
309        /// Output .apr file path (default: derived from source name)
310        #[arg(short, long)]
311        output: Option<PathBuf>,
312        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
313        #[arg(long, default_value = "auto")]
314        arch: String,
315        /// Quantization (int8, int4, fp16)
316        #[arg(long)]
317        quantize: Option<String>,
318        /// Strict mode: reject unverified architectures and fail on validation errors
319        #[arg(long)]
320        strict: bool,
321        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
322        /// Uses realizar's Q4K converter instead of dequantizing to F32
323        #[arg(long)]
324        preserve_q4k: bool,
325        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
326        /// Required if the GGUF has no embedded tokenizer vocabulary.
327        #[arg(long)]
328        tokenizer: Option<PathBuf>,
329        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
330        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
331        #[arg(long)]
332        enforce_provenance: bool,
333        /// GH-223: Allow import without config.json (default: error).
334        /// Without config.json, hyperparameters like rope_theta are inferred from
335        /// tensor shapes and may be wrong, producing garbage output.
336        #[arg(long)]
337        allow_no_config: bool,
338    },
339    /// Download and cache model from HuggingFace (Ollama-like UX)
340    Pull {
341        /// Model reference (alias, hf:// URI, or org/repo)
342        #[arg(value_name = "MODEL")]
343        model_ref: String,
344        /// Force re-download even if cached
345        #[arg(long)]
346        force: bool,
347    },
348    /// List cached models
349    #[command(name = "list", alias = "ls")]
350    List,
351    /// Remove model from cache
352    #[command(name = "rm", alias = "remove")]
353    Rm {
354        /// Model reference to remove
355        #[arg(value_name = "MODEL")]
356        model_ref: String,
357    },
358    /// Convert/optimize model
359    Convert {
360        /// Path to .apr model file
361        #[arg(value_name = "FILE")]
362        file: PathBuf,
363        /// Quantize to format (int8, int4, fp16, q4k)
364        #[arg(long)]
365        quantize: Option<String>,
366        /// Compress output (none, zstd, zstd-max, lz4)
367        #[arg(long)]
368        compress: Option<String>,
369        /// Output file path
370        #[arg(short, long)]
371        output: PathBuf,
372        /// Force overwrite existing files
373        #[arg(short, long)]
374        force: bool,
375    },
376    /// Compile model into standalone executable (APR-SPEC §4.16)
377    Compile {
378        /// Input .apr model file
379        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
380        file: Option<PathBuf>,
381        /// Output binary path (default: derived from model name)
382        #[arg(short, long)]
383        output: Option<PathBuf>,
384        /// Target triple (e.g., x86_64-unknown-linux-musl)
385        #[arg(long)]
386        target: Option<String>,
387        /// Quantize weights before embedding (int8, int4, fp16)
388        #[arg(long)]
389        quantize: Option<String>,
390        /// Release mode (optimized)
391        #[arg(long)]
392        release: bool,
393        /// Strip debug symbols
394        #[arg(long)]
395        strip: bool,
396        /// Enable LTO (Link-Time Optimization)
397        #[arg(long)]
398        lto: bool,
399        /// List available compilation targets
400        #[arg(long)]
401        list_targets: bool,
402    },
403    /// Merge multiple models
404    Merge {
405        /// Model files to merge
406        #[arg(value_name = "FILES", num_args = 2..)]
407        files: Vec<PathBuf>,
408        /// Merge strategy (average, weighted, slerp, ties, dare)
409        #[arg(long, default_value = "average")]
410        strategy: String,
411        /// Output file path (optional in --plan mode)
412        #[arg(short, long, required_unless_present = "plan")]
413        output: Option<PathBuf>,
414        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
415        #[arg(long, value_delimiter = ',')]
416        weights: Option<Vec<f32>>,
417        /// Base model for TIES/DARE (task vectors computed as delta from base)
418        #[arg(long)]
419        base_model: Option<PathBuf>,
420        /// DARE drop probability (default: 0.9)
421        #[arg(long, default_value = "0.9")]
422        drop_rate: f32,
423        /// TIES trim density threshold (default: 0.2)
424        #[arg(long, default_value = "0.2")]
425        density: f32,
426        /// RNG seed for DARE (default: 42)
427        #[arg(long, default_value = "42")]
428        seed: u64,
429        /// Plan mode (validate inputs, show merge plan, no execution)
430        #[arg(long)]
431        plan: bool,
432    },
433    /// Quantize model weights (GH-243)
434    Quantize {
435        /// Input model file
436        #[arg(value_name = "FILE")]
437        file: PathBuf,
438        /// Quantization scheme: int8, int4, fp16, q4k
439        #[arg(long, short = 's', default_value = "int4")]
440        scheme: String,
441        /// Output file path (required unless --plan)
442        #[arg(short, long)]
443        output: Option<PathBuf>,
444        /// Output format override (apr, gguf, safetensors)
445        #[arg(long)]
446        format: Option<String>,
447        /// Batch quantization (comma-separated schemes)
448        #[arg(long)]
449        batch: Option<String>,
450        /// Plan mode (estimate only, no execution)
451        #[arg(long)]
452        plan: bool,
453        /// Force overwrite existing files
454        #[arg(short, long)]
455        force: bool,
456    },
457    /// Model optimization commands (fine-tune, prune, distill)
458    #[command(flatten)]
459    ModelOps(ModelOpsCommands),
460    /// Interactive terminal UI
461    Tui {
462        /// Path to .apr model file
463        #[arg(value_name = "FILE")]
464        file: Option<PathBuf>,
465    },
466    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
467    Check {
468        /// Path to model file
469        #[arg(value_name = "FILE")]
470        file: PathBuf,
471        /// Disable GPU acceleration
472        #[arg(long)]
473        no_gpu: bool,
474        /// Output as JSON
475        #[arg(long)]
476        json: bool,
477    },
478    /// GPU status and VRAM reservation management (GPU-SHARE-001)
479    #[cfg(feature = "training")]
480    Gpu {
481        /// Show reservations as JSON
482        #[arg(long)]
483        json: bool,
484    },
485    /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
486    #[cfg(feature = "code")]
487    Code {
488        /// Path to local GGUF/APR model file (prefers .apr format)
489        #[arg(long)]
490        model: Option<PathBuf>,
491
492        /// Project directory (loads APR.md/CLAUDE.md from this path)
493        #[arg(long, default_value = ".")]
494        project: PathBuf,
495
496        /// Resume previous session (optionally by ID)
497        #[arg(long)]
498        resume: Option<Option<String>>,
499
500        /// Agent manifest (advanced — overrides defaults)
501        #[arg(long)]
502        manifest: Option<PathBuf>,
503
504        /// Initial prompt (non-interactive: print response and exit)
505        #[arg(short, long)]
506        print: bool,
507
508        /// Prompt text (positional, for -p mode)
509        #[arg(trailing_var_arg = true)]
510        prompt: Vec<String>,
511
512        /// Max turns before stopping
513        #[arg(long, default_value = "50")]
514        max_turns: u32,
515    },
516    /// Extended analysis, profiling, QA, and visualization commands
517    #[command(flatten)]
518    Extended(ExtendedCommands),
519}
apr_cli/commands_enum.rs

apr_cli/
commands_enum.rs