apr_cli/commands_enum.rs
1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4 /// Run model directly (auto-download, cache, execute)
5 Run {
6 /// Model source: local path, hf://org/repo, or URL
7 #[arg(value_name = "SOURCE")]
8 source: String,
9 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10 #[arg(value_name = "PROMPT")]
11 positional_prompt: Option<String>,
12 /// Input file (audio, text, etc.)
13 #[arg(short, long)]
14 input: Option<PathBuf>,
15 /// Text prompt for generation (for LLM models)
16 #[arg(short, long)]
17 prompt: Option<String>,
18 /// Maximum tokens to generate (default: 32)
19 #[arg(short = 'n', long, default_value = "32")]
20 max_tokens: usize,
21 /// Enable streaming output
22 #[arg(long)]
23 stream: bool,
24 /// Language code (for ASR models)
25 #[arg(short, long)]
26 language: Option<String>,
27 /// Task (transcribe, translate)
28 #[arg(short, long)]
29 task: Option<String>,
30 /// Output format (text, json, srt, vtt)
31 #[arg(short = 'f', long, default_value = "text")]
32 format: String,
33 /// Disable GPU acceleration (force CPU-only inference)
34 #[arg(long, alias = "cpu", conflicts_with = "gpu")]
35 no_gpu: bool,
36 /// Force GPU acceleration
37 #[arg(long, conflicts_with = "no_gpu")]
38 gpu: bool,
39 /// Offline mode: block all network access (Sovereign AI compliance)
40 #[arg(long)]
41 offline: bool,
42 /// Benchmark mode: output performance metrics (tok/s, latency)
43 #[arg(long)]
44 benchmark: bool,
45 /// Enable inference tracing (APR-TRACE-001)
46 #[arg(long)]
47 trace: bool,
48 /// Trace specific steps only (comma-separated)
49 #[arg(long, value_delimiter = ',')]
50 trace_steps: Option<Vec<String>>,
51 /// Verbose tracing (show tensor values)
52 #[arg(long)]
53 trace_verbose: bool,
54 /// Save trace output to JSON file
55 #[arg(long, value_name = "FILE")]
56 trace_output: Option<PathBuf>,
57 /// Trace detail level (none, basic, layer, payload, chrome)
58 /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59 /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60 #[arg(long, value_name = "LEVEL", default_value = "basic")]
61 trace_level: String,
62 /// Shorthand for --trace --trace-level payload (tensor value inspection)
63 #[arg(long)]
64 trace_payload: bool,
65 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66 #[arg(long)]
67 profile: bool,
68 /// Apply chat template for Instruct models (GAP-UX-001)
69 ///
70 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72 #[arg(long)]
73 chat: bool,
74 /// Sampling temperature (0.0 = greedy, default: 0.0)
75 #[arg(long, default_value = "0.0")]
76 temperature: f32,
77 /// Top-k sampling (default: 1 = greedy)
78 #[arg(long, default_value = "1")]
79 top_k: usize,
80 /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81 /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82 #[arg(long)]
83 top_p: Option<f32>,
84 /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85 /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86 #[arg(long, default_value = "299792458")]
87 seed: u64,
88 /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89 /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90 #[arg(long, default_value = "1.0")]
91 repeat_penalty: f32,
92 /// Context window for repetition penalty (number of recent tokens to check)
93 /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94 #[arg(long, default_value = "64")]
95 repeat_last_n: usize,
96 /// Process prompt tokens one-by-one instead of batched prefill.
97 /// Useful for debugging prefill correctness (comparing per-token attention).
98 /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99 #[arg(long)]
100 split_prompt: bool,
101 /// Batch mode: read prompts from JSONL, output results as JSONL.
102 /// Model loads once, processes all prompts sequentially.
103 /// Each input line: {"prompt": "...", "task_id": "..."}
104 /// Chat template is applied automatically.
105 #[arg(long, value_name = "FILE")]
106 batch_jsonl: Option<PathBuf>,
107 /// Show verbose output (model loading, backend info)
108 #[arg(short, long)]
109 verbose: bool,
110 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111 #[arg(long, value_name = "BACKEND")]
112 backend: Option<String>,
113 },
114 /// Inference server (plan/run)
115 Serve {
116 #[command(subcommand)]
117 command: ServeCommands,
118 },
119 /// Inspect model metadata, vocab, and structure
120 Inspect {
121 /// Path to .apr model file
122 #[arg(value_name = "FILE")]
123 file: PathBuf,
124 /// Show vocabulary details
125 #[arg(long)]
126 vocab: bool,
127 /// Show filter/security details
128 #[arg(long)]
129 filters: bool,
130 /// Show weight statistics
131 #[arg(long)]
132 weights: bool,
133 /// Output as JSON
134 #[arg(long)]
135 json: bool,
136 },
137 /// Simple debugging output ("drama" mode available)
138 Debug {
139 /// Path to .apr model file
140 #[arg(value_name = "FILE")]
141 file: PathBuf,
142 /// Theatrical "drama" mode output
143 #[arg(long)]
144 drama: bool,
145 /// Show hex dump
146 #[arg(long)]
147 hex: bool,
148 /// Extract ASCII strings
149 #[arg(long)]
150 strings: bool,
151 /// Limit output lines
152 #[arg(long, default_value = "256")]
153 limit: usize,
154 },
155 /// Validate model integrity and quality
156 Validate {
157 /// Path to .apr model file
158 #[arg(value_name = "FILE")]
159 file: PathBuf,
160 /// Show 100-point quality assessment
161 #[arg(long)]
162 quality: bool,
163 /// Strict validation (fail on warnings)
164 #[arg(long)]
165 strict: bool,
166 /// Minimum score to pass (0-100)
167 #[arg(long)]
168 min_score: Option<u8>,
169 },
170 /// Validate a publish manifest (FALSIFY-PM-001..006).
171 ///
172 /// Contract: `contracts/publish-manifest-v1.yaml`
173 /// Spec: SPEC-SHIP-TWO-001 §12.3 AC-EX-004
174 ValidateManifest {
175 /// Path to manifest YAML
176 #[arg(value_name = "MANIFEST")]
177 file: PathBuf,
178 /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
179 #[arg(long, value_name = "APR_FILE")]
180 artifact: Option<PathBuf>,
181 /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
182 /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
183 /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
184 #[arg(long)]
185 live: bool,
186 },
187 /// Compare two models
188 Diff {
189 /// First model file
190 #[arg(value_name = "FILE1")]
191 file1: PathBuf,
192 /// Second model file
193 #[arg(value_name = "FILE2")]
194 file2: PathBuf,
195 /// Show weight-level differences
196 #[arg(long)]
197 weights: bool,
198 /// Compare actual tensor values with statistical analysis
199 #[arg(long)]
200 values: bool,
201 /// Filter tensors by name pattern (for --values)
202 #[arg(long)]
203 filter: Option<String>,
204 /// Maximum number of tensors to compare (for --values)
205 #[arg(long, default_value = "10")]
206 limit: usize,
207 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
208 #[arg(long)]
209 transpose_aware: bool,
210 /// Output as JSON
211 #[arg(long)]
212 json: bool,
213 },
214 /// List tensor names and shapes
215 Tensors {
216 /// Path to .apr model file
217 #[arg(value_name = "FILE")]
218 file: PathBuf,
219 /// Show tensor statistics (mean, std, min, max)
220 #[arg(long)]
221 stats: bool,
222 /// Filter tensors by name pattern
223 #[arg(long)]
224 filter: Option<String>,
225 /// Limit number of tensors shown (0 = unlimited)
226 #[arg(long, default_value = "0")]
227 limit: usize,
228 /// Output as JSON
229 #[arg(long)]
230 json: bool,
231 },
232 /// Layer-by-layer trace analysis
233 Trace {
234 /// Path to .apr model file
235 #[arg(value_name = "FILE")]
236 file: PathBuf,
237 /// Filter layers by name pattern
238 #[arg(long)]
239 layer: Option<String>,
240 /// Compare with reference model
241 #[arg(long)]
242 reference: Option<PathBuf>,
243 /// Output as JSON
244 #[arg(long)]
245 json: bool,
246 /// Verbose output with per-layer stats
247 #[arg(short, long)]
248 verbose: bool,
249 /// Trace payload through model
250 #[arg(long)]
251 payload: bool,
252 /// Diff mode
253 #[arg(long)]
254 diff: bool,
255 /// Interactive mode
256 #[arg(long)]
257 interactive: bool,
258 },
259 /// Check for best practices and conventions
260 Lint {
261 /// Path to .apr model file
262 #[arg(value_name = "FILE")]
263 file: PathBuf,
264 },
265 /// Explain errors, architecture, tensors, and kernel dispatch
266 Explain {
267 /// Error code, model file path, or family name (auto-detected)
268 #[arg(value_name = "CODE_OR_FILE")]
269 code_or_file: Option<String>,
270 /// Path to .apr model file (optional context for --tensor)
271 #[arg(short, long)]
272 file: Option<PathBuf>,
273 /// Explain a specific tensor
274 #[arg(long)]
275 tensor: Option<String>,
276 /// Explain kernel dispatch pipeline for architecture
277 #[arg(long)]
278 kernel: bool,
279 /// Output as JSON
280 #[arg(long)]
281 json: bool,
282 /// Show kernel contract details and proof obligations
283 #[arg(short, long)]
284 verbose: bool,
285 /// Show per-kernel proof status from contract tests
286 #[arg(long)]
287 proof_status: bool,
288 },
289 /// Manage canary tests for regression
290 Canary {
291 #[command(subcommand)]
292 command: CanaryCommands,
293 },
294 /// Export model to other formats
295 Export {
296 /// Path to .apr model file
297 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
298 file: Option<PathBuf>,
299 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
300 #[arg(long, default_value = "safetensors")]
301 format: String,
302 /// Output file/directory path
303 #[arg(short, long)]
304 output: Option<PathBuf>,
305 /// Apply quantization during export (int8, int4, fp16)
306 #[arg(long)]
307 quantize: Option<String>,
308 /// List all supported export formats
309 #[arg(long)]
310 list_formats: bool,
311 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
312 #[arg(long)]
313 batch: Option<String>,
314 /// Output in JSON format
315 #[arg(long)]
316 json: bool,
317 /// Plan mode (validate inputs, show export plan, no execution)
318 #[arg(long)]
319 plan: bool,
320 },
321 /// Import from external formats (hf://org/repo, local files, URLs)
322 Import {
323 /// Source: hf://org/repo, local file, or URL
324 #[arg(value_name = "SOURCE")]
325 source: String,
326 /// Output .apr file path (default: derived from source name)
327 #[arg(short, long)]
328 output: Option<PathBuf>,
329 /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
330 #[arg(long, default_value = "auto")]
331 arch: String,
332 /// Quantization (int8, int4, fp16)
333 #[arg(long)]
334 quantize: Option<String>,
335 /// Strict mode: reject unverified architectures and fail on validation errors
336 #[arg(long)]
337 strict: bool,
338 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
339 /// Uses realizar's Q4K converter instead of dequantizing to F32
340 #[arg(long)]
341 preserve_q4k: bool,
342 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
343 /// Required if the GGUF has no embedded tokenizer vocabulary.
344 #[arg(long)]
345 tokenizer: Option<PathBuf>,
346 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
347 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
348 #[arg(long)]
349 enforce_provenance: bool,
350 /// GH-223: Allow import without config.json (default: error).
351 /// Without config.json, hyperparameters like rope_theta are inferred from
352 /// tensor shapes and may be wrong, producing garbage output.
353 #[arg(long)]
354 allow_no_config: bool,
355 },
356 /// Download and cache model from HuggingFace (Ollama-like UX)
357 Pull {
358 /// Model reference (alias, hf:// URI, or org/repo)
359 #[arg(value_name = "MODEL")]
360 model_ref: String,
361 /// Force re-download even if cached
362 #[arg(long)]
363 force: bool,
364 },
365 /// List cached models
366 #[command(name = "list", alias = "ls")]
367 List,
368 /// Remove model from cache
369 #[command(name = "rm", alias = "remove")]
370 Rm {
371 /// Model reference to remove
372 #[arg(value_name = "MODEL")]
373 model_ref: String,
374 },
375 /// Convert/optimize model
376 Convert {
377 /// Path to .apr model file
378 #[arg(value_name = "FILE")]
379 file: PathBuf,
380 /// Quantize to format (int8, int4, fp16, q4k)
381 #[arg(long)]
382 quantize: Option<String>,
383 /// Compress output (none, zstd, zstd-max, lz4)
384 #[arg(long)]
385 compress: Option<String>,
386 /// Output file path
387 #[arg(short, long)]
388 output: PathBuf,
389 /// Force overwrite existing files
390 #[arg(short, long)]
391 force: bool,
392 },
393 /// Compile model into standalone executable (APR-SPEC §4.16)
394 Compile {
395 /// Input .apr model file
396 #[arg(value_name = "FILE", required_unless_present = "list_targets")]
397 file: Option<PathBuf>,
398 /// Output binary path (default: derived from model name)
399 #[arg(short, long)]
400 output: Option<PathBuf>,
401 /// Target triple (e.g., x86_64-unknown-linux-musl)
402 #[arg(long)]
403 target: Option<String>,
404 /// Quantize weights before embedding (int8, int4, fp16)
405 #[arg(long)]
406 quantize: Option<String>,
407 /// Release mode (optimized)
408 #[arg(long)]
409 release: bool,
410 /// Strip debug symbols
411 #[arg(long)]
412 strip: bool,
413 /// Enable LTO (Link-Time Optimization)
414 #[arg(long)]
415 lto: bool,
416 /// List available compilation targets
417 #[arg(long)]
418 list_targets: bool,
419 },
420 /// Merge multiple models
421 Merge {
422 /// Model files to merge
423 #[arg(value_name = "FILES", num_args = 2..)]
424 files: Vec<PathBuf>,
425 /// Merge strategy (average, weighted, slerp, ties, dare)
426 #[arg(long, default_value = "average")]
427 strategy: String,
428 /// Output file path (optional in --plan mode)
429 #[arg(short, long, required_unless_present = "plan")]
430 output: Option<PathBuf>,
431 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
432 #[arg(long, value_delimiter = ',')]
433 weights: Option<Vec<f32>>,
434 /// Base model for TIES/DARE (task vectors computed as delta from base)
435 #[arg(long)]
436 base_model: Option<PathBuf>,
437 /// DARE drop probability (default: 0.9)
438 #[arg(long, default_value = "0.9")]
439 drop_rate: f32,
440 /// TIES trim density threshold (default: 0.2)
441 #[arg(long, default_value = "0.2")]
442 density: f32,
443 /// RNG seed for DARE (default: 42)
444 #[arg(long, default_value = "42")]
445 seed: u64,
446 /// Plan mode (validate inputs, show merge plan, no execution)
447 #[arg(long)]
448 plan: bool,
449 },
450 /// Quantize model weights (GH-243)
451 Quantize {
452 /// Input model file
453 #[arg(value_name = "FILE")]
454 file: PathBuf,
455 /// Quantization scheme: int8, int4, fp16, q4k
456 #[arg(long, short = 's', default_value = "int4")]
457 scheme: String,
458 /// Output file path (required unless --plan)
459 #[arg(short, long)]
460 output: Option<PathBuf>,
461 /// Output format override (apr, gguf, safetensors)
462 #[arg(long)]
463 format: Option<String>,
464 /// Batch quantization (comma-separated schemes)
465 #[arg(long)]
466 batch: Option<String>,
467 /// Plan mode (estimate only, no execution)
468 #[arg(long)]
469 plan: bool,
470 /// Force overwrite existing files
471 #[arg(short, long)]
472 force: bool,
473 },
474 /// Model optimization commands (fine-tune, prune, distill)
475 #[command(flatten)]
476 ModelOps(ModelOpsCommands),
477 /// Start the MCP (Model Context Protocol) server over stdio
478 ///
479 /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
480 /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
481 Mcp {},
482 /// Interactive terminal UI
483 Tui {
484 /// Path to .apr model file
485 #[arg(value_name = "FILE")]
486 file: Option<PathBuf>,
487 },
488 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
489 Check {
490 /// Path to model file
491 #[arg(value_name = "FILE")]
492 file: PathBuf,
493 /// Disable GPU acceleration
494 #[arg(long)]
495 no_gpu: bool,
496 /// Output as JSON
497 #[arg(long)]
498 json: bool,
499 },
500 /// GPU status and VRAM reservation management (GPU-SHARE-001)
501 #[cfg(feature = "training")]
502 Gpu {
503 /// Show reservations as JSON
504 #[arg(long)]
505 json: bool,
506 },
507 /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
508 #[cfg(feature = "code")]
509 Code {
510 /// Path to local GGUF/APR model file (prefers .apr format)
511 #[arg(long)]
512 model: Option<PathBuf>,
513
514 /// Project directory (loads APR.md/CLAUDE.md from this path)
515 #[arg(long, default_value = ".")]
516 project: PathBuf,
517
518 /// Resume previous session (optionally by ID)
519 #[arg(long)]
520 resume: Option<Option<String>>,
521
522 /// Agent manifest (advanced — overrides defaults)
523 #[arg(long)]
524 manifest: Option<PathBuf>,
525
526 /// Initial prompt (non-interactive: print response and exit)
527 #[arg(short, long)]
528 print: bool,
529
530 /// Prompt text (positional, for -p mode)
531 #[arg(trailing_var_arg = true)]
532 prompt: Vec<String>,
533
534 /// Max turns before stopping
535 #[arg(long, default_value = "50")]
536 max_turns: u32,
537 },
538 /// Extended analysis, profiling, QA, and visualization commands
539 #[command(flatten)]
540 Extended(ExtendedCommands),
541
542 /// Monorepo management (publish, shims, audit, archive) [dev-only]
543 #[cfg(feature = "dev")]
544 #[command(subcommand)]
545 Mono(crate::commands::mono::MonoCommands),
546}