apr_cli/commands_enum.rs
1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4 /// Run model directly (auto-download, cache, execute)
5 Run {
6 /// Model source: local path, hf://org/repo, or URL
7 #[arg(value_name = "SOURCE")]
8 source: String,
9 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10 #[arg(value_name = "PROMPT")]
11 positional_prompt: Option<String>,
12 /// Input file (audio, text, etc.)
13 #[arg(short, long)]
14 input: Option<PathBuf>,
15 /// Text prompt for generation (for LLM models)
16 #[arg(short, long)]
17 prompt: Option<String>,
18 /// Maximum tokens to generate (default: 32)
19 #[arg(short = 'n', long, default_value = "32")]
20 max_tokens: usize,
21 /// Enable streaming output
22 #[arg(long)]
23 stream: bool,
24 /// Language code (for ASR models)
25 #[arg(short, long)]
26 language: Option<String>,
27 /// Task (transcribe, translate)
28 #[arg(short, long)]
29 task: Option<String>,
30 /// Output format (text, json, srt, vtt)
31 #[arg(short = 'f', long, default_value = "text")]
32 format: String,
33 /// Disable GPU acceleration
34 #[arg(long, conflicts_with = "gpu")]
35 no_gpu: bool,
36 /// Force GPU acceleration
37 #[arg(long, conflicts_with = "no_gpu")]
38 gpu: bool,
39 /// Offline mode: block all network access (Sovereign AI compliance)
40 #[arg(long)]
41 offline: bool,
42 /// Benchmark mode: output performance metrics (tok/s, latency)
43 #[arg(long)]
44 benchmark: bool,
45 /// Enable inference tracing (APR-TRACE-001)
46 #[arg(long)]
47 trace: bool,
48 /// Trace specific steps only (comma-separated)
49 #[arg(long, value_delimiter = ',')]
50 trace_steps: Option<Vec<String>>,
51 /// Verbose tracing (show tensor values)
52 #[arg(long)]
53 trace_verbose: bool,
54 /// Save trace output to JSON file
55 #[arg(long, value_name = "FILE")]
56 trace_output: Option<PathBuf>,
57 /// Trace detail level (none, basic, layer, payload, chrome)
58 /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
59 /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
60 #[arg(long, value_name = "LEVEL", default_value = "basic")]
61 trace_level: String,
62 /// Shorthand for --trace --trace-level payload (tensor value inspection)
63 #[arg(long)]
64 trace_payload: bool,
65 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
66 #[arg(long)]
67 profile: bool,
68 /// Apply chat template for Instruct models (GAP-UX-001)
69 ///
70 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
71 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
72 #[arg(long)]
73 chat: bool,
74 /// Sampling temperature (0.0 = greedy, default: 0.0)
75 #[arg(long, default_value = "0.0")]
76 temperature: f32,
77 /// Top-k sampling (default: 1 = greedy)
78 #[arg(long, default_value = "1")]
79 top_k: usize,
80 /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
81 /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
82 #[arg(long)]
83 top_p: Option<f32>,
84 /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
85 /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
86 #[arg(long, default_value = "299792458")]
87 seed: u64,
88 /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
89 /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
90 #[arg(long, default_value = "1.0")]
91 repeat_penalty: f32,
92 /// Context window for repetition penalty (number of recent tokens to check)
93 /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
94 #[arg(long, default_value = "64")]
95 repeat_last_n: usize,
96 /// Process prompt tokens one-by-one instead of batched prefill.
97 /// Useful for debugging prefill correctness (comparing per-token attention).
98 /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
99 #[arg(long)]
100 split_prompt: bool,
101 /// Batch mode: read prompts from JSONL, output results as JSONL.
102 /// Model loads once, processes all prompts sequentially.
103 /// Each input line: {"prompt": "...", "task_id": "..."}
104 /// Chat template is applied automatically.
105 #[arg(long, value_name = "FILE")]
106 batch_jsonl: Option<PathBuf>,
107 /// Show verbose output (model loading, backend info)
108 #[arg(short, long)]
109 verbose: bool,
110 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
111 #[arg(long, value_name = "BACKEND")]
112 backend: Option<String>,
113 },
114 /// Inference server (plan/run)
115 Serve {
116 #[command(subcommand)]
117 command: ServeCommands,
118 },
119 /// Inspect model metadata, vocab, and structure
120 Inspect {
121 /// Path to .apr model file
122 #[arg(value_name = "FILE")]
123 file: PathBuf,
124 /// Show vocabulary details
125 #[arg(long)]
126 vocab: bool,
127 /// Show filter/security details
128 #[arg(long)]
129 filters: bool,
130 /// Show weight statistics
131 #[arg(long)]
132 weights: bool,
133 /// Output as JSON
134 #[arg(long)]
135 json: bool,
136 },
137 /// Simple debugging output ("drama" mode available)
138 Debug {
139 /// Path to .apr model file
140 #[arg(value_name = "FILE")]
141 file: PathBuf,
142 /// Theatrical "drama" mode output
143 #[arg(long)]
144 drama: bool,
145 /// Show hex dump
146 #[arg(long)]
147 hex: bool,
148 /// Extract ASCII strings
149 #[arg(long)]
150 strings: bool,
151 /// Limit output lines
152 #[arg(long, default_value = "256")]
153 limit: usize,
154 },
155 /// Validate model integrity and quality
156 Validate {
157 /// Path to .apr model file
158 #[arg(value_name = "FILE")]
159 file: PathBuf,
160 /// Show 100-point quality assessment
161 #[arg(long)]
162 quality: bool,
163 /// Strict validation (fail on warnings)
164 #[arg(long)]
165 strict: bool,
166 /// Minimum score to pass (0-100)
167 #[arg(long)]
168 min_score: Option<u8>,
169 },
170 /// Compare two models
171 Diff {
172 /// First model file
173 #[arg(value_name = "FILE1")]
174 file1: PathBuf,
175 /// Second model file
176 #[arg(value_name = "FILE2")]
177 file2: PathBuf,
178 /// Show weight-level differences
179 #[arg(long)]
180 weights: bool,
181 /// Compare actual tensor values with statistical analysis
182 #[arg(long)]
183 values: bool,
184 /// Filter tensors by name pattern (for --values)
185 #[arg(long)]
186 filter: Option<String>,
187 /// Maximum number of tensors to compare (for --values)
188 #[arg(long, default_value = "10")]
189 limit: usize,
190 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
191 #[arg(long)]
192 transpose_aware: bool,
193 /// Output as JSON
194 #[arg(long)]
195 json: bool,
196 },
197 /// List tensor names and shapes
198 Tensors {
199 /// Path to .apr model file
200 #[arg(value_name = "FILE")]
201 file: PathBuf,
202 /// Show tensor statistics (mean, std, min, max)
203 #[arg(long)]
204 stats: bool,
205 /// Filter tensors by name pattern
206 #[arg(long)]
207 filter: Option<String>,
208 /// Limit number of tensors shown (0 = unlimited)
209 #[arg(long, default_value = "0")]
210 limit: usize,
211 /// Output as JSON
212 #[arg(long)]
213 json: bool,
214 },
215 /// Layer-by-layer trace analysis
216 Trace {
217 /// Path to .apr model file
218 #[arg(value_name = "FILE")]
219 file: PathBuf,
220 /// Filter layers by name pattern
221 #[arg(long)]
222 layer: Option<String>,
223 /// Compare with reference model
224 #[arg(long)]
225 reference: Option<PathBuf>,
226 /// Output as JSON
227 #[arg(long)]
228 json: bool,
229 /// Verbose output with per-layer stats
230 #[arg(short, long)]
231 verbose: bool,
232 /// Trace payload through model
233 #[arg(long)]
234 payload: bool,
235 /// Diff mode
236 #[arg(long)]
237 diff: bool,
238 /// Interactive mode
239 #[arg(long)]
240 interactive: bool,
241 },
242 /// Check for best practices and conventions
243 Lint {
244 /// Path to .apr model file
245 #[arg(value_name = "FILE")]
246 file: PathBuf,
247 },
248 /// Explain errors, architecture, tensors, and kernel dispatch
249 Explain {
250 /// Error code, model file path, or family name (auto-detected)
251 #[arg(value_name = "CODE_OR_FILE")]
252 code_or_file: Option<String>,
253 /// Path to .apr model file (optional context for --tensor)
254 #[arg(short, long)]
255 file: Option<PathBuf>,
256 /// Explain a specific tensor
257 #[arg(long)]
258 tensor: Option<String>,
259 /// Explain kernel dispatch pipeline for architecture
260 #[arg(long)]
261 kernel: bool,
262 /// Output as JSON
263 #[arg(long)]
264 json: bool,
265 /// Show kernel contract details and proof obligations
266 #[arg(short, long)]
267 verbose: bool,
268 /// Show per-kernel proof status from contract tests
269 #[arg(long)]
270 proof_status: bool,
271 },
272 /// Manage canary tests for regression
273 Canary {
274 #[command(subcommand)]
275 command: CanaryCommands,
276 },
277 /// Export model to other formats
278 Export {
279 /// Path to .apr model file
280 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
281 file: Option<PathBuf>,
282 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
283 #[arg(long, default_value = "safetensors")]
284 format: String,
285 /// Output file/directory path
286 #[arg(short, long)]
287 output: Option<PathBuf>,
288 /// Apply quantization during export (int8, int4, fp16)
289 #[arg(long)]
290 quantize: Option<String>,
291 /// List all supported export formats
292 #[arg(long)]
293 list_formats: bool,
294 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
295 #[arg(long)]
296 batch: Option<String>,
297 /// Output in JSON format
298 #[arg(long)]
299 json: bool,
300 /// Plan mode (validate inputs, show export plan, no execution)
301 #[arg(long)]
302 plan: bool,
303 },
304 /// Import from external formats (hf://org/repo, local files, URLs)
305 Import {
306 /// Source: hf://org/repo, local file, or URL
307 #[arg(value_name = "SOURCE")]
308 source: String,
309 /// Output .apr file path (default: derived from source name)
310 #[arg(short, long)]
311 output: Option<PathBuf>,
312 /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
313 #[arg(long, default_value = "auto")]
314 arch: String,
315 /// Quantization (int8, int4, fp16)
316 #[arg(long)]
317 quantize: Option<String>,
318 /// Strict mode: reject unverified architectures and fail on validation errors
319 #[arg(long)]
320 strict: bool,
321 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
322 /// Uses realizar's Q4K converter instead of dequantizing to F32
323 #[arg(long)]
324 preserve_q4k: bool,
325 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
326 /// Required if the GGUF has no embedded tokenizer vocabulary.
327 #[arg(long)]
328 tokenizer: Option<PathBuf>,
329 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
330 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
331 #[arg(long)]
332 enforce_provenance: bool,
333 /// GH-223: Allow import without config.json (default: error).
334 /// Without config.json, hyperparameters like rope_theta are inferred from
335 /// tensor shapes and may be wrong, producing garbage output.
336 #[arg(long)]
337 allow_no_config: bool,
338 },
339 /// Download and cache model from HuggingFace (Ollama-like UX)
340 Pull {
341 /// Model reference (alias, hf:// URI, or org/repo)
342 #[arg(value_name = "MODEL")]
343 model_ref: String,
344 /// Force re-download even if cached
345 #[arg(long)]
346 force: bool,
347 },
348 /// List cached models
349 #[command(name = "list", alias = "ls")]
350 List,
351 /// Remove model from cache
352 #[command(name = "rm", alias = "remove")]
353 Rm {
354 /// Model reference to remove
355 #[arg(value_name = "MODEL")]
356 model_ref: String,
357 },
358 /// Convert/optimize model
359 Convert {
360 /// Path to .apr model file
361 #[arg(value_name = "FILE")]
362 file: PathBuf,
363 /// Quantize to format (int8, int4, fp16, q4k)
364 #[arg(long)]
365 quantize: Option<String>,
366 /// Compress output (none, zstd, zstd-max, lz4)
367 #[arg(long)]
368 compress: Option<String>,
369 /// Output file path
370 #[arg(short, long)]
371 output: PathBuf,
372 /// Force overwrite existing files
373 #[arg(short, long)]
374 force: bool,
375 },
376 /// Compile model into standalone executable (APR-SPEC §4.16)
377 Compile {
378 /// Input .apr model file
379 #[arg(value_name = "FILE", required_unless_present = "list_targets")]
380 file: Option<PathBuf>,
381 /// Output binary path (default: derived from model name)
382 #[arg(short, long)]
383 output: Option<PathBuf>,
384 /// Target triple (e.g., x86_64-unknown-linux-musl)
385 #[arg(long)]
386 target: Option<String>,
387 /// Quantize weights before embedding (int8, int4, fp16)
388 #[arg(long)]
389 quantize: Option<String>,
390 /// Release mode (optimized)
391 #[arg(long)]
392 release: bool,
393 /// Strip debug symbols
394 #[arg(long)]
395 strip: bool,
396 /// Enable LTO (Link-Time Optimization)
397 #[arg(long)]
398 lto: bool,
399 /// List available compilation targets
400 #[arg(long)]
401 list_targets: bool,
402 },
403 /// Merge multiple models
404 Merge {
405 /// Model files to merge
406 #[arg(value_name = "FILES", num_args = 2..)]
407 files: Vec<PathBuf>,
408 /// Merge strategy (average, weighted, slerp, ties, dare)
409 #[arg(long, default_value = "average")]
410 strategy: String,
411 /// Output file path (optional in --plan mode)
412 #[arg(short, long, required_unless_present = "plan")]
413 output: Option<PathBuf>,
414 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
415 #[arg(long, value_delimiter = ',')]
416 weights: Option<Vec<f32>>,
417 /// Base model for TIES/DARE (task vectors computed as delta from base)
418 #[arg(long)]
419 base_model: Option<PathBuf>,
420 /// DARE drop probability (default: 0.9)
421 #[arg(long, default_value = "0.9")]
422 drop_rate: f32,
423 /// TIES trim density threshold (default: 0.2)
424 #[arg(long, default_value = "0.2")]
425 density: f32,
426 /// RNG seed for DARE (default: 42)
427 #[arg(long, default_value = "42")]
428 seed: u64,
429 /// Plan mode (validate inputs, show merge plan, no execution)
430 #[arg(long)]
431 plan: bool,
432 },
433 /// Quantize model weights (GH-243)
434 Quantize {
435 /// Input model file
436 #[arg(value_name = "FILE")]
437 file: PathBuf,
438 /// Quantization scheme: int8, int4, fp16, q4k
439 #[arg(long, short = 's', default_value = "int4")]
440 scheme: String,
441 /// Output file path (required unless --plan)
442 #[arg(short, long)]
443 output: Option<PathBuf>,
444 /// Output format override (apr, gguf, safetensors)
445 #[arg(long)]
446 format: Option<String>,
447 /// Batch quantization (comma-separated schemes)
448 #[arg(long)]
449 batch: Option<String>,
450 /// Plan mode (estimate only, no execution)
451 #[arg(long)]
452 plan: bool,
453 /// Force overwrite existing files
454 #[arg(short, long)]
455 force: bool,
456 },
457 /// Model optimization commands (fine-tune, prune, distill)
458 #[command(flatten)]
459 ModelOps(ModelOpsCommands),
460 /// Interactive terminal UI
461 Tui {
462 /// Path to .apr model file
463 #[arg(value_name = "FILE")]
464 file: Option<PathBuf>,
465 },
466 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
467 Check {
468 /// Path to model file
469 #[arg(value_name = "FILE")]
470 file: PathBuf,
471 /// Disable GPU acceleration
472 #[arg(long)]
473 no_gpu: bool,
474 /// Output as JSON
475 #[arg(long)]
476 json: bool,
477 },
478 /// GPU status and VRAM reservation management (GPU-SHARE-001)
479 #[cfg(feature = "training")]
480 Gpu {
481 /// Show reservations as JSON
482 #[arg(long)]
483 json: bool,
484 },
485 /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
486 #[cfg(feature = "code")]
487 Code {
488 /// Path to local GGUF/APR model file (prefers .apr format)
489 #[arg(long)]
490 model: Option<PathBuf>,
491
492 /// Project directory (loads APR.md/CLAUDE.md from this path)
493 #[arg(long, default_value = ".")]
494 project: PathBuf,
495
496 /// Resume previous session (optionally by ID)
497 #[arg(long)]
498 resume: Option<Option<String>>,
499
500 /// Agent manifest (advanced — overrides defaults)
501 #[arg(long)]
502 manifest: Option<PathBuf>,
503
504 /// Initial prompt (non-interactive: print response and exit)
505 #[arg(short, long)]
506 print: bool,
507
508 /// Prompt text (positional, for -p mode)
509 #[arg(trailing_var_arg = true)]
510 prompt: Vec<String>,
511
512 /// Max turns before stopping
513 #[arg(long, default_value = "50")]
514 max_turns: u32,
515 },
516 /// Extended analysis, profiling, QA, and visualization commands
517 #[command(flatten)]
518 Extended(ExtendedCommands),
519}