apr_cli/commands_enum.rs
1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4 /// Run model directly (auto-download, cache, execute)
5 Run {
6 /// Model source: local path, hf://org/repo, or URL
7 #[arg(value_name = "SOURCE")]
8 source: String,
9 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10 #[arg(value_name = "PROMPT")]
11 positional_prompt: Option<String>,
12 /// Input file (audio, text, etc.)
13 #[arg(short, long)]
14 input: Option<PathBuf>,
15 /// Text prompt for generation (for LLM models)
16 #[arg(short, long)]
17 prompt: Option<String>,
18 /// Maximum tokens to generate (default: 32)
19 #[arg(short = 'n', long, default_value = "32")]
20 max_tokens: usize,
21 /// Enable streaming output
22 #[arg(long)]
23 stream: bool,
24 /// Language code (for ASR models)
25 #[arg(short, long)]
26 language: Option<String>,
27 /// Task (transcribe, translate)
28 #[arg(short, long)]
29 task: Option<String>,
30 /// Output format (text, json, srt, vtt)
31 #[arg(short = 'f', long, default_value = "text")]
32 format: String,
33 /// Disable GPU acceleration
34 #[arg(long, conflicts_with = "gpu")]
35 no_gpu: bool,
36 /// Force GPU acceleration
37 #[arg(long, conflicts_with = "no_gpu")]
38 gpu: bool,
39 /// Offline mode: block all network access (Sovereign AI compliance)
40 #[arg(long)]
41 offline: bool,
42 /// Benchmark mode: output performance metrics (tok/s, latency)
43 #[arg(long)]
44 benchmark: bool,
45 /// Enable inference tracing (APR-TRACE-001)
46 #[arg(long)]
47 trace: bool,
48 /// Trace specific steps only (comma-separated)
49 #[arg(long, value_delimiter = ',')]
50 trace_steps: Option<Vec<String>>,
51 /// Verbose tracing (show tensor values)
52 #[arg(long)]
53 trace_verbose: bool,
54 /// Save trace output to JSON file
55 #[arg(long, value_name = "FILE")]
56 trace_output: Option<PathBuf>,
57 /// Trace detail level (none, basic, layer, payload)
58 #[arg(long, value_name = "LEVEL", default_value = "basic")]
59 trace_level: String,
60 /// Shorthand for --trace --trace-level payload (tensor value inspection)
61 #[arg(long)]
62 trace_payload: bool,
63 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
64 #[arg(long)]
65 profile: bool,
66 /// Apply chat template for Instruct models (GAP-UX-001)
67 ///
68 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
69 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
70 #[arg(long)]
71 chat: bool,
72 /// Show verbose output (model loading, backend info)
73 #[arg(short, long)]
74 verbose: bool,
75 },
76 /// Inference server (plan/run)
77 Serve {
78 #[command(subcommand)]
79 command: ServeCommands,
80 },
81 /// Inspect model metadata, vocab, and structure
82 Inspect {
83 /// Path to .apr model file
84 #[arg(value_name = "FILE")]
85 file: PathBuf,
86 /// Show vocabulary details
87 #[arg(long)]
88 vocab: bool,
89 /// Show filter/security details
90 #[arg(long)]
91 filters: bool,
92 /// Show weight statistics
93 #[arg(long)]
94 weights: bool,
95 /// Output as JSON
96 #[arg(long)]
97 json: bool,
98 },
99 /// Simple debugging output ("drama" mode available)
100 Debug {
101 /// Path to .apr model file
102 #[arg(value_name = "FILE")]
103 file: PathBuf,
104 /// Theatrical "drama" mode output
105 #[arg(long)]
106 drama: bool,
107 /// Show hex dump
108 #[arg(long)]
109 hex: bool,
110 /// Extract ASCII strings
111 #[arg(long)]
112 strings: bool,
113 /// Limit output lines
114 #[arg(long, default_value = "256")]
115 limit: usize,
116 },
117 /// Validate model integrity and quality
118 Validate {
119 /// Path to .apr model file
120 #[arg(value_name = "FILE")]
121 file: PathBuf,
122 /// Show 100-point quality assessment
123 #[arg(long)]
124 quality: bool,
125 /// Strict validation (fail on warnings)
126 #[arg(long)]
127 strict: bool,
128 /// Minimum score to pass (0-100)
129 #[arg(long)]
130 min_score: Option<u8>,
131 },
132 /// Compare two models
133 Diff {
134 /// First model file
135 #[arg(value_name = "FILE1")]
136 file1: PathBuf,
137 /// Second model file
138 #[arg(value_name = "FILE2")]
139 file2: PathBuf,
140 /// Show weight-level differences
141 #[arg(long)]
142 weights: bool,
143 /// Compare actual tensor values with statistical analysis
144 #[arg(long)]
145 values: bool,
146 /// Filter tensors by name pattern (for --values)
147 #[arg(long)]
148 filter: Option<String>,
149 /// Maximum number of tensors to compare (for --values)
150 #[arg(long, default_value = "10")]
151 limit: usize,
152 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
153 #[arg(long)]
154 transpose_aware: bool,
155 /// Output as JSON
156 #[arg(long)]
157 json: bool,
158 },
159 /// List tensor names and shapes
160 Tensors {
161 /// Path to .apr model file
162 #[arg(value_name = "FILE")]
163 file: PathBuf,
164 /// Show tensor statistics (mean, std, min, max)
165 #[arg(long)]
166 stats: bool,
167 /// Filter tensors by name pattern
168 #[arg(long)]
169 filter: Option<String>,
170 /// Limit number of tensors shown (0 = unlimited)
171 #[arg(long, default_value = "0")]
172 limit: usize,
173 /// Output as JSON
174 #[arg(long)]
175 json: bool,
176 },
177 /// Layer-by-layer trace analysis
178 Trace {
179 /// Path to .apr model file
180 #[arg(value_name = "FILE")]
181 file: PathBuf,
182 /// Filter layers by name pattern
183 #[arg(long)]
184 layer: Option<String>,
185 /// Compare with reference model
186 #[arg(long)]
187 reference: Option<PathBuf>,
188 /// Output as JSON
189 #[arg(long)]
190 json: bool,
191 /// Verbose output with per-layer stats
192 #[arg(short, long)]
193 verbose: bool,
194 /// Trace payload through model
195 #[arg(long)]
196 payload: bool,
197 /// Diff mode
198 #[arg(long)]
199 diff: bool,
200 /// Interactive mode
201 #[arg(long)]
202 interactive: bool,
203 },
204 /// Check for best practices and conventions
205 Lint {
206 /// Path to .apr model file
207 #[arg(value_name = "FILE")]
208 file: PathBuf,
209 },
210 /// Explain errors, architecture, tensors, and kernel dispatch
211 Explain {
212 /// Error code, model file path, or family name (auto-detected)
213 #[arg(value_name = "CODE_OR_FILE")]
214 code_or_file: Option<String>,
215 /// Path to .apr model file (optional context for --tensor)
216 #[arg(short, long)]
217 file: Option<PathBuf>,
218 /// Explain a specific tensor
219 #[arg(long)]
220 tensor: Option<String>,
221 /// Explain kernel dispatch pipeline for architecture
222 #[arg(long)]
223 kernel: bool,
224 /// Output as JSON
225 #[arg(long)]
226 json: bool,
227 /// Show kernel contract details and proof obligations
228 #[arg(short, long)]
229 verbose: bool,
230 /// Show per-kernel proof status from contract tests
231 #[arg(long)]
232 proof_status: bool,
233 },
234 /// Manage canary tests for regression
235 Canary {
236 #[command(subcommand)]
237 command: CanaryCommands,
238 },
239 /// Export model to other formats
240 Export {
241 /// Path to .apr model file
242 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
243 file: Option<PathBuf>,
244 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
245 #[arg(long, default_value = "safetensors")]
246 format: String,
247 /// Output file/directory path
248 #[arg(short, long)]
249 output: Option<PathBuf>,
250 /// Apply quantization during export (int8, int4, fp16)
251 #[arg(long)]
252 quantize: Option<String>,
253 /// List all supported export formats
254 #[arg(long)]
255 list_formats: bool,
256 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
257 #[arg(long)]
258 batch: Option<String>,
259 /// Output in JSON format
260 #[arg(long)]
261 json: bool,
262 /// Plan mode (validate inputs, show export plan, no execution)
263 #[arg(long)]
264 plan: bool,
265 },
266 /// Import from external formats (hf://org/repo, local files, URLs)
267 Import {
268 /// Source: hf://org/repo, local file, or URL
269 #[arg(value_name = "SOURCE")]
270 source: String,
271 /// Output .apr file path (default: derived from source name)
272 #[arg(short, long)]
273 output: Option<PathBuf>,
274 /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
275 #[arg(long, default_value = "auto")]
276 arch: String,
277 /// Quantization (int8, int4, fp16)
278 #[arg(long)]
279 quantize: Option<String>,
280 /// Strict mode: reject unverified architectures and fail on validation errors
281 #[arg(long)]
282 strict: bool,
283 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
284 /// Uses realizar's Q4K converter instead of dequantizing to F32
285 #[arg(long)]
286 preserve_q4k: bool,
287 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
288 /// Required if the GGUF has no embedded tokenizer vocabulary.
289 #[arg(long)]
290 tokenizer: Option<PathBuf>,
291 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
292 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
293 #[arg(long)]
294 enforce_provenance: bool,
295 /// GH-223: Allow import without config.json (default: error).
296 /// Without config.json, hyperparameters like rope_theta are inferred from
297 /// tensor shapes and may be wrong, producing garbage output.
298 #[arg(long)]
299 allow_no_config: bool,
300 },
301 /// Download and cache model from HuggingFace (Ollama-like UX)
302 Pull {
303 /// Model reference (alias, hf:// URI, or org/repo)
304 #[arg(value_name = "MODEL")]
305 model_ref: String,
306 /// Force re-download even if cached
307 #[arg(long)]
308 force: bool,
309 },
310 /// List cached models
311 #[command(name = "list", alias = "ls")]
312 List,
313 /// Remove model from cache
314 #[command(name = "rm", alias = "remove")]
315 Rm {
316 /// Model reference to remove
317 #[arg(value_name = "MODEL")]
318 model_ref: String,
319 },
320 /// Convert/optimize model
321 Convert {
322 /// Path to .apr model file
323 #[arg(value_name = "FILE")]
324 file: PathBuf,
325 /// Quantize to format (int8, int4, fp16, q4k)
326 #[arg(long)]
327 quantize: Option<String>,
328 /// Compress output (none, zstd, zstd-max, lz4)
329 #[arg(long)]
330 compress: Option<String>,
331 /// Output file path
332 #[arg(short, long)]
333 output: PathBuf,
334 /// Force overwrite existing files
335 #[arg(short, long)]
336 force: bool,
337 },
338 /// Compile model into standalone executable (APR-SPEC ยง4.16)
339 Compile {
340 /// Input .apr model file
341 #[arg(value_name = "FILE", required_unless_present = "list_targets")]
342 file: Option<PathBuf>,
343 /// Output binary path (default: derived from model name)
344 #[arg(short, long)]
345 output: Option<PathBuf>,
346 /// Target triple (e.g., x86_64-unknown-linux-musl)
347 #[arg(long)]
348 target: Option<String>,
349 /// Quantize weights before embedding (int8, int4, fp16)
350 #[arg(long)]
351 quantize: Option<String>,
352 /// Release mode (optimized)
353 #[arg(long)]
354 release: bool,
355 /// Strip debug symbols
356 #[arg(long)]
357 strip: bool,
358 /// Enable LTO (Link-Time Optimization)
359 #[arg(long)]
360 lto: bool,
361 /// List available compilation targets
362 #[arg(long)]
363 list_targets: bool,
364 },
365 /// Merge multiple models
366 Merge {
367 /// Model files to merge
368 #[arg(value_name = "FILES", num_args = 2..)]
369 files: Vec<PathBuf>,
370 /// Merge strategy (average, weighted, slerp, ties, dare)
371 #[arg(long, default_value = "average")]
372 strategy: String,
373 /// Output file path
374 #[arg(short, long)]
375 output: PathBuf,
376 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
377 #[arg(long, value_delimiter = ',')]
378 weights: Option<Vec<f32>>,
379 /// Base model for TIES/DARE (task vectors computed as delta from base)
380 #[arg(long)]
381 base_model: Option<PathBuf>,
382 /// DARE drop probability (default: 0.9)
383 #[arg(long, default_value = "0.9")]
384 drop_rate: f32,
385 /// TIES trim density threshold (default: 0.2)
386 #[arg(long, default_value = "0.2")]
387 density: f32,
388 /// RNG seed for DARE (default: 42)
389 #[arg(long, default_value = "42")]
390 seed: u64,
391 /// Plan mode (validate inputs, show merge plan, no execution)
392 #[arg(long)]
393 plan: bool,
394 },
395 /// Quantize model weights (GH-243)
396 Quantize {
397 /// Input model file
398 #[arg(value_name = "FILE")]
399 file: PathBuf,
400 /// Quantization scheme: int8, int4, fp16, q4k
401 #[arg(long, short = 's', default_value = "int4")]
402 scheme: String,
403 /// Output file path (required unless --plan)
404 #[arg(short, long)]
405 output: Option<PathBuf>,
406 /// Output format override (apr, gguf, safetensors)
407 #[arg(long)]
408 format: Option<String>,
409 /// Batch quantization (comma-separated schemes)
410 #[arg(long)]
411 batch: Option<String>,
412 /// Plan mode (estimate only, no execution)
413 #[arg(long)]
414 plan: bool,
415 /// Force overwrite existing files
416 #[arg(short, long)]
417 force: bool,
418 },
419 /// Model optimization commands (fine-tune, prune, distill)
420 #[command(flatten)]
421 ModelOps(ModelOpsCommands),
422 /// Interactive terminal UI
423 Tui {
424 /// Path to .apr model file
425 #[arg(value_name = "FILE")]
426 file: Option<PathBuf>,
427 },
428 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
429 Check {
430 /// Path to model file
431 #[arg(value_name = "FILE")]
432 file: PathBuf,
433 /// Disable GPU acceleration
434 #[arg(long)]
435 no_gpu: bool,
436 /// Output as JSON
437 #[arg(long)]
438 json: bool,
439 },
440 /// GPU status and VRAM reservation management (GPU-SHARE-001)
441 #[cfg(feature = "training")]
442 Gpu {
443 /// Show reservations as JSON
444 #[arg(long)]
445 json: bool,
446 },
447 /// Extended analysis, profiling, QA, and visualization commands
448 #[command(flatten)]
449 Extended(ExtendedCommands),
450}