apr_cli/commands_enum.rs
1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4 /// Run model directly (auto-download, cache, execute)
5 Run {
6 /// Model source: local path, hf://org/repo, or URL
7 #[arg(value_name = "SOURCE")]
8 source: String,
9 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10 #[arg(value_name = "PROMPT")]
11 positional_prompt: Option<String>,
12 /// Input file (audio, text, etc.)
13 #[arg(short, long)]
14 input: Option<PathBuf>,
15 /// Text prompt for generation (for LLM models)
16 #[arg(short, long)]
17 prompt: Option<String>,
18 /// Maximum tokens to generate (default: 32)
19 #[arg(short = 'n', long, default_value = "32")]
20 max_tokens: usize,
21 /// Enable streaming output
22 #[arg(long)]
23 stream: bool,
24 /// Language code (for ASR models)
25 #[arg(short, long)]
26 language: Option<String>,
27 /// Task (transcribe, translate)
28 #[arg(short, long)]
29 task: Option<String>,
30 /// Output format (text, json, srt, vtt)
31 #[arg(short = 'f', long, default_value = "text")]
32 format: String,
33 /// Disable GPU acceleration
34 #[arg(long, conflicts_with = "gpu")]
35 no_gpu: bool,
36 /// Force GPU acceleration
37 #[arg(long, conflicts_with = "no_gpu")]
38 gpu: bool,
39 /// Offline mode: block all network access (Sovereign AI compliance)
40 #[arg(long)]
41 offline: bool,
42 /// Benchmark mode: output performance metrics (tok/s, latency)
43 #[arg(long)]
44 benchmark: bool,
45 /// Enable inference tracing (APR-TRACE-001)
46 #[arg(long)]
47 trace: bool,
48 /// Trace specific steps only (comma-separated)
49 #[arg(long, value_delimiter = ',')]
50 trace_steps: Option<Vec<String>>,
51 /// Verbose tracing (show tensor values)
52 #[arg(long)]
53 trace_verbose: bool,
54 /// Save trace output to JSON file
55 #[arg(long, value_name = "FILE")]
56 trace_output: Option<PathBuf>,
57 /// Trace detail level (none, basic, layer, payload)
58 #[arg(long, value_name = "LEVEL", default_value = "basic")]
59 trace_level: String,
60 /// Shorthand for --trace --trace-level payload (tensor value inspection)
61 #[arg(long)]
62 trace_payload: bool,
63 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
64 #[arg(long)]
65 profile: bool,
66 /// Apply chat template for Instruct models (GAP-UX-001)
67 ///
68 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
69 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
70 #[arg(long)]
71 chat: bool,
72 /// Show verbose output (model loading, backend info)
73 #[arg(short, long)]
74 verbose: bool,
75 },
76 /// Start inference server (REST API, streaming, metrics)
77 Serve {
78 /// Path to model file
79 #[arg(value_name = "FILE")]
80 file: PathBuf,
81 /// Port to listen on
82 #[arg(short, long, default_value = "8080")]
83 port: u16,
84 /// Host to bind to
85 #[arg(long, default_value = "127.0.0.1")]
86 host: String,
87 /// Disable CORS
88 #[arg(long)]
89 no_cors: bool,
90 /// Disable Prometheus metrics endpoint
91 #[arg(long)]
92 no_metrics: bool,
93 /// Disable GPU acceleration
94 #[arg(long)]
95 no_gpu: bool,
96 /// Force GPU acceleration (requires CUDA)
97 #[arg(long)]
98 gpu: bool,
99 /// Enable batched GPU inference for 2X+ throughput
100 #[arg(long)]
101 batch: bool,
102 /// Enable inference tracing (PMAT-SHOWCASE-METHODOLOGY-001)
103 #[arg(long)]
104 trace: bool,
105 /// Trace detail level (none, basic, layer)
106 #[arg(long, value_name = "LEVEL", default_value = "basic")]
107 trace_level: String,
108 /// Enable inline Roofline profiling (adds X-Profile headers)
109 #[arg(long)]
110 profile: bool,
111 },
112 /// Inspect model metadata, vocab, and structure
113 Inspect {
114 /// Path to .apr model file
115 #[arg(value_name = "FILE")]
116 file: PathBuf,
117 /// Show vocabulary details
118 #[arg(long)]
119 vocab: bool,
120 /// Show filter/security details
121 #[arg(long)]
122 filters: bool,
123 /// Show weight statistics
124 #[arg(long)]
125 weights: bool,
126 /// Output as JSON
127 #[arg(long)]
128 json: bool,
129 },
130 /// Simple debugging output ("drama" mode available)
131 Debug {
132 /// Path to .apr model file
133 #[arg(value_name = "FILE")]
134 file: PathBuf,
135 /// Theatrical "drama" mode output
136 #[arg(long)]
137 drama: bool,
138 /// Show hex dump
139 #[arg(long)]
140 hex: bool,
141 /// Extract ASCII strings
142 #[arg(long)]
143 strings: bool,
144 /// Limit output lines
145 #[arg(long, default_value = "256")]
146 limit: usize,
147 },
148 /// Validate model integrity and quality
149 Validate {
150 /// Path to .apr model file
151 #[arg(value_name = "FILE")]
152 file: PathBuf,
153 /// Show 100-point quality assessment
154 #[arg(long)]
155 quality: bool,
156 /// Strict validation (fail on warnings)
157 #[arg(long)]
158 strict: bool,
159 /// Minimum score to pass (0-100)
160 #[arg(long)]
161 min_score: Option<u8>,
162 },
163 /// Compare two models
164 Diff {
165 /// First model file
166 #[arg(value_name = "FILE1")]
167 file1: PathBuf,
168 /// Second model file
169 #[arg(value_name = "FILE2")]
170 file2: PathBuf,
171 /// Show weight-level differences
172 #[arg(long)]
173 weights: bool,
174 /// Compare actual tensor values with statistical analysis
175 #[arg(long)]
176 values: bool,
177 /// Filter tensors by name pattern (for --values)
178 #[arg(long)]
179 filter: Option<String>,
180 /// Maximum number of tensors to compare (for --values)
181 #[arg(long, default_value = "10")]
182 limit: usize,
183 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
184 #[arg(long)]
185 transpose_aware: bool,
186 /// Output as JSON
187 #[arg(long)]
188 json: bool,
189 },
190 /// List tensor names and shapes
191 Tensors {
192 /// Path to .apr model file
193 #[arg(value_name = "FILE")]
194 file: PathBuf,
195 /// Show tensor statistics (mean, std, min, max)
196 #[arg(long)]
197 stats: bool,
198 /// Filter tensors by name pattern
199 #[arg(long)]
200 filter: Option<String>,
201 /// Limit number of tensors shown (0 = unlimited)
202 #[arg(long, default_value = "0")]
203 limit: usize,
204 /// Output as JSON
205 #[arg(long)]
206 json: bool,
207 },
208 /// Layer-by-layer trace analysis
209 Trace {
210 /// Path to .apr model file
211 #[arg(value_name = "FILE")]
212 file: PathBuf,
213 /// Filter layers by name pattern
214 #[arg(long)]
215 layer: Option<String>,
216 /// Compare with reference model
217 #[arg(long)]
218 reference: Option<PathBuf>,
219 /// Output as JSON
220 #[arg(long)]
221 json: bool,
222 /// Verbose output with per-layer stats
223 #[arg(short, long)]
224 verbose: bool,
225 /// Trace payload through model
226 #[arg(long)]
227 payload: bool,
228 /// Diff mode
229 #[arg(long)]
230 diff: bool,
231 /// Interactive mode
232 #[arg(long)]
233 interactive: bool,
234 },
235 /// Check for best practices and conventions
236 Lint {
237 /// Path to .apr model file
238 #[arg(value_name = "FILE")]
239 file: PathBuf,
240 },
241 /// Explain errors, architecture, and tensors
242 Explain {
243 /// Explain a specific error code
244 #[arg(value_name = "CODE")]
245 code: Option<String>,
246 /// Path to .apr model file (optional context)
247 #[arg(short, long)]
248 file: Option<PathBuf>,
249 /// Explain a specific tensor
250 #[arg(long)]
251 tensor: Option<String>,
252 },
253 /// Manage canary tests for regression
254 Canary {
255 #[command(subcommand)]
256 command: CanaryCommands,
257 },
258 /// Export model to other formats
259 Export {
260 /// Path to .apr model file
261 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
262 file: Option<PathBuf>,
263 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
264 #[arg(long, default_value = "safetensors")]
265 format: String,
266 /// Output file/directory path
267 #[arg(short, long)]
268 output: Option<PathBuf>,
269 /// Apply quantization during export (int8, int4, fp16)
270 #[arg(long)]
271 quantize: Option<String>,
272 /// List all supported export formats
273 #[arg(long)]
274 list_formats: bool,
275 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
276 #[arg(long)]
277 batch: Option<String>,
278 /// Output in JSON format
279 #[arg(long)]
280 json: bool,
281 },
282 /// Import from external formats (hf://org/repo, local files, URLs)
283 Import {
284 /// Source: hf://org/repo, local file, or URL
285 #[arg(value_name = "SOURCE")]
286 source: String,
287 /// Output .apr file path (default: derived from source name)
288 #[arg(short, long)]
289 output: Option<PathBuf>,
290 /// Model architecture (whisper, llama, bert, auto)
291 #[arg(long, default_value = "auto")]
292 arch: String,
293 /// Quantization (int8, int4, fp16)
294 #[arg(long)]
295 quantize: Option<String>,
296 /// Strict mode: reject unverified architectures and fail on validation errors
297 #[arg(long)]
298 strict: bool,
299 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
300 /// Uses realizar's Q4K converter instead of dequantizing to F32
301 #[arg(long)]
302 preserve_q4k: bool,
303 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
304 /// Required if the GGUF has no embedded tokenizer vocabulary.
305 #[arg(long)]
306 tokenizer: Option<PathBuf>,
307 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
308 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
309 #[arg(long)]
310 enforce_provenance: bool,
311 /// GH-223: Allow import without config.json (default: error).
312 /// Without config.json, hyperparameters like rope_theta are inferred from
313 /// tensor shapes and may be wrong, producing garbage output.
314 #[arg(long)]
315 allow_no_config: bool,
316 },
317 /// Download and cache model from HuggingFace (Ollama-like UX)
318 Pull {
319 /// Model reference (alias, hf:// URI, or org/repo)
320 #[arg(value_name = "MODEL")]
321 model_ref: String,
322 /// Force re-download even if cached
323 #[arg(long)]
324 force: bool,
325 },
326 /// List cached models
327 #[command(name = "list", alias = "ls")]
328 List,
329 /// Remove model from cache
330 #[command(name = "rm", alias = "remove")]
331 Rm {
332 /// Model reference to remove
333 #[arg(value_name = "MODEL")]
334 model_ref: String,
335 },
336 /// Convert/optimize model
337 Convert {
338 /// Path to .apr model file
339 #[arg(value_name = "FILE")]
340 file: PathBuf,
341 /// Quantize to format (int8, int4, fp16, q4k)
342 #[arg(long)]
343 quantize: Option<String>,
344 /// Compress output (none, zstd, zstd-max, lz4)
345 #[arg(long)]
346 compress: Option<String>,
347 /// Output file path
348 #[arg(short, long)]
349 output: PathBuf,
350 /// Force overwrite existing files
351 #[arg(short, long)]
352 force: bool,
353 },
354 /// Merge multiple models
355 Merge {
356 /// Model files to merge
357 #[arg(value_name = "FILES", num_args = 2..)]
358 files: Vec<PathBuf>,
359 /// Merge strategy (average, weighted, slerp, ties, dare)
360 #[arg(long, default_value = "average")]
361 strategy: String,
362 /// Output file path
363 #[arg(short, long)]
364 output: PathBuf,
365 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
366 #[arg(long, value_delimiter = ',')]
367 weights: Option<Vec<f32>>,
368 /// Base model for TIES/DARE (task vectors computed as delta from base)
369 #[arg(long)]
370 base_model: Option<PathBuf>,
371 /// DARE drop probability (default: 0.9)
372 #[arg(long, default_value = "0.9")]
373 drop_rate: f32,
374 /// TIES trim density threshold (default: 0.2)
375 #[arg(long, default_value = "0.2")]
376 density: f32,
377 /// RNG seed for DARE (default: 42)
378 #[arg(long, default_value = "42")]
379 seed: u64,
380 },
381 /// Quantize model weights (GH-243)
382 Quantize {
383 /// Input model file
384 #[arg(value_name = "FILE")]
385 file: PathBuf,
386 /// Quantization scheme: int8, int4, fp16, q4k
387 #[arg(long, short = 's', default_value = "int4")]
388 scheme: String,
389 /// Output file path (required unless --plan)
390 #[arg(short, long)]
391 output: Option<PathBuf>,
392 /// Output format override (apr, gguf, safetensors)
393 #[arg(long)]
394 format: Option<String>,
395 /// Batch quantization (comma-separated schemes)
396 #[arg(long)]
397 batch: Option<String>,
398 /// Plan mode (estimate only, no execution)
399 #[arg(long)]
400 plan: bool,
401 /// Force overwrite existing files
402 #[arg(short, long)]
403 force: bool,
404 },
405 /// Model optimization commands (fine-tune, prune, distill)
406 #[command(flatten)]
407 ModelOps(ModelOpsCommands),
408 /// Interactive terminal UI
409 Tui {
410 /// Path to .apr model file
411 #[arg(value_name = "FILE")]
412 file: Option<PathBuf>,
413 },
414 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
415 Check {
416 /// Path to model file
417 #[arg(value_name = "FILE")]
418 file: PathBuf,
419 /// Disable GPU acceleration
420 #[arg(long)]
421 no_gpu: bool,
422 /// Output as JSON
423 #[arg(long)]
424 json: bool,
425 },
426 /// Extended analysis, profiling, QA, and visualization commands
427 #[command(flatten)]
428 Extended(ExtendedCommands),
429}