Skip to main content

apr_cli/
commands_enum.rs

1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4    /// Run model directly (auto-download, cache, execute)
5    Run {
6        /// Model source: local path, hf://org/repo, or URL
7        #[arg(value_name = "SOURCE")]
8        source: String,
9        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10        #[arg(value_name = "PROMPT")]
11        positional_prompt: Option<String>,
12        /// Input file (audio, text, etc.)
13        #[arg(short, long)]
14        input: Option<PathBuf>,
15        /// Text prompt for generation (for LLM models)
16        #[arg(short, long)]
17        prompt: Option<String>,
18        /// Maximum tokens to generate (default: 32)
19        #[arg(short = 'n', long, default_value = "32")]
20        max_tokens: usize,
21        /// Enable streaming output
22        #[arg(long)]
23        stream: bool,
24        /// Language code (for ASR models)
25        #[arg(short, long)]
26        language: Option<String>,
27        /// Task (transcribe, translate)
28        #[arg(short, long)]
29        task: Option<String>,
30        /// Output format (text, json, srt, vtt)
31        #[arg(short = 'f', long, default_value = "text")]
32        format: String,
33        /// Disable GPU acceleration
34        #[arg(long, conflicts_with = "gpu")]
35        no_gpu: bool,
36        /// Force GPU acceleration
37        #[arg(long, conflicts_with = "no_gpu")]
38        gpu: bool,
39        /// Offline mode: block all network access (Sovereign AI compliance)
40        #[arg(long)]
41        offline: bool,
42        /// Benchmark mode: output performance metrics (tok/s, latency)
43        #[arg(long)]
44        benchmark: bool,
45        /// Enable inference tracing (APR-TRACE-001)
46        #[arg(long)]
47        trace: bool,
48        /// Trace specific steps only (comma-separated)
49        #[arg(long, value_delimiter = ',')]
50        trace_steps: Option<Vec<String>>,
51        /// Verbose tracing (show tensor values)
52        #[arg(long)]
53        trace_verbose: bool,
54        /// Save trace output to JSON file
55        #[arg(long, value_name = "FILE")]
56        trace_output: Option<PathBuf>,
57        /// Trace detail level (none, basic, layer, payload)
58        #[arg(long, value_name = "LEVEL", default_value = "basic")]
59        trace_level: String,
60        /// Shorthand for --trace --trace-level payload (tensor value inspection)
61        #[arg(long)]
62        trace_payload: bool,
63        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
64        #[arg(long)]
65        profile: bool,
66        /// Apply chat template for Instruct models (GAP-UX-001)
67        ///
68        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
69        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
70        #[arg(long)]
71        chat: bool,
72        /// Show verbose output (model loading, backend info)
73        #[arg(short, long)]
74        verbose: bool,
75    },
76    /// Inference server (plan/run)
77    Serve {
78        #[command(subcommand)]
79        command: ServeCommands,
80    },
81    /// Inspect model metadata, vocab, and structure
82    Inspect {
83        /// Path to .apr model file
84        #[arg(value_name = "FILE")]
85        file: PathBuf,
86        /// Show vocabulary details
87        #[arg(long)]
88        vocab: bool,
89        /// Show filter/security details
90        #[arg(long)]
91        filters: bool,
92        /// Show weight statistics
93        #[arg(long)]
94        weights: bool,
95        /// Output as JSON
96        #[arg(long)]
97        json: bool,
98    },
99    /// Simple debugging output ("drama" mode available)
100    Debug {
101        /// Path to .apr model file
102        #[arg(value_name = "FILE")]
103        file: PathBuf,
104        /// Theatrical "drama" mode output
105        #[arg(long)]
106        drama: bool,
107        /// Show hex dump
108        #[arg(long)]
109        hex: bool,
110        /// Extract ASCII strings
111        #[arg(long)]
112        strings: bool,
113        /// Limit output lines
114        #[arg(long, default_value = "256")]
115        limit: usize,
116    },
117    /// Validate model integrity and quality
118    Validate {
119        /// Path to .apr model file
120        #[arg(value_name = "FILE")]
121        file: PathBuf,
122        /// Show 100-point quality assessment
123        #[arg(long)]
124        quality: bool,
125        /// Strict validation (fail on warnings)
126        #[arg(long)]
127        strict: bool,
128        /// Minimum score to pass (0-100)
129        #[arg(long)]
130        min_score: Option<u8>,
131    },
132    /// Compare two models
133    Diff {
134        /// First model file
135        #[arg(value_name = "FILE1")]
136        file1: PathBuf,
137        /// Second model file
138        #[arg(value_name = "FILE2")]
139        file2: PathBuf,
140        /// Show weight-level differences
141        #[arg(long)]
142        weights: bool,
143        /// Compare actual tensor values with statistical analysis
144        #[arg(long)]
145        values: bool,
146        /// Filter tensors by name pattern (for --values)
147        #[arg(long)]
148        filter: Option<String>,
149        /// Maximum number of tensors to compare (for --values)
150        #[arg(long, default_value = "10")]
151        limit: usize,
152        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
153        #[arg(long)]
154        transpose_aware: bool,
155        /// Output as JSON
156        #[arg(long)]
157        json: bool,
158    },
159    /// List tensor names and shapes
160    Tensors {
161        /// Path to .apr model file
162        #[arg(value_name = "FILE")]
163        file: PathBuf,
164        /// Show tensor statistics (mean, std, min, max)
165        #[arg(long)]
166        stats: bool,
167        /// Filter tensors by name pattern
168        #[arg(long)]
169        filter: Option<String>,
170        /// Limit number of tensors shown (0 = unlimited)
171        #[arg(long, default_value = "0")]
172        limit: usize,
173        /// Output as JSON
174        #[arg(long)]
175        json: bool,
176    },
177    /// Layer-by-layer trace analysis
178    Trace {
179        /// Path to .apr model file
180        #[arg(value_name = "FILE")]
181        file: PathBuf,
182        /// Filter layers by name pattern
183        #[arg(long)]
184        layer: Option<String>,
185        /// Compare with reference model
186        #[arg(long)]
187        reference: Option<PathBuf>,
188        /// Output as JSON
189        #[arg(long)]
190        json: bool,
191        /// Verbose output with per-layer stats
192        #[arg(short, long)]
193        verbose: bool,
194        /// Trace payload through model
195        #[arg(long)]
196        payload: bool,
197        /// Diff mode
198        #[arg(long)]
199        diff: bool,
200        /// Interactive mode
201        #[arg(long)]
202        interactive: bool,
203    },
204    /// Check for best practices and conventions
205    Lint {
206        /// Path to .apr model file
207        #[arg(value_name = "FILE")]
208        file: PathBuf,
209    },
210    /// Explain errors, architecture, tensors, and kernel dispatch
211    Explain {
212        /// Error code, model file path, or family name (auto-detected)
213        #[arg(value_name = "CODE_OR_FILE")]
214        code_or_file: Option<String>,
215        /// Path to .apr model file (optional context for --tensor)
216        #[arg(short, long)]
217        file: Option<PathBuf>,
218        /// Explain a specific tensor
219        #[arg(long)]
220        tensor: Option<String>,
221        /// Explain kernel dispatch pipeline for architecture
222        #[arg(long)]
223        kernel: bool,
224        /// Output as JSON
225        #[arg(long)]
226        json: bool,
227        /// Show kernel contract details and proof obligations
228        #[arg(short, long)]
229        verbose: bool,
230        /// Show per-kernel proof status from contract tests
231        #[arg(long)]
232        proof_status: bool,
233    },
234    /// Manage canary tests for regression
235    Canary {
236        #[command(subcommand)]
237        command: CanaryCommands,
238    },
239    /// Export model to other formats
240    Export {
241        /// Path to .apr model file
242        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
243        file: Option<PathBuf>,
244        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
245        #[arg(long, default_value = "safetensors")]
246        format: String,
247        /// Output file/directory path
248        #[arg(short, long)]
249        output: Option<PathBuf>,
250        /// Apply quantization during export (int8, int4, fp16)
251        #[arg(long)]
252        quantize: Option<String>,
253        /// List all supported export formats
254        #[arg(long)]
255        list_formats: bool,
256        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
257        #[arg(long)]
258        batch: Option<String>,
259        /// Output in JSON format
260        #[arg(long)]
261        json: bool,
262        /// Plan mode (validate inputs, show export plan, no execution)
263        #[arg(long)]
264        plan: bool,
265    },
266    /// Import from external formats (hf://org/repo, local files, URLs)
267    Import {
268        /// Source: hf://org/repo, local file, or URL
269        #[arg(value_name = "SOURCE")]
270        source: String,
271        /// Output .apr file path (default: derived from source name)
272        #[arg(short, long)]
273        output: Option<PathBuf>,
274        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
275        #[arg(long, default_value = "auto")]
276        arch: String,
277        /// Quantization (int8, int4, fp16)
278        #[arg(long)]
279        quantize: Option<String>,
280        /// Strict mode: reject unverified architectures and fail on validation errors
281        #[arg(long)]
282        strict: bool,
283        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
284        /// Uses realizar's Q4K converter instead of dequantizing to F32
285        #[arg(long)]
286        preserve_q4k: bool,
287        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
288        /// Required if the GGUF has no embedded tokenizer vocabulary.
289        #[arg(long)]
290        tokenizer: Option<PathBuf>,
291        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
292        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
293        #[arg(long)]
294        enforce_provenance: bool,
295        /// GH-223: Allow import without config.json (default: error).
296        /// Without config.json, hyperparameters like rope_theta are inferred from
297        /// tensor shapes and may be wrong, producing garbage output.
298        #[arg(long)]
299        allow_no_config: bool,
300    },
301    /// Download and cache model from HuggingFace (Ollama-like UX)
302    Pull {
303        /// Model reference (alias, hf:// URI, or org/repo)
304        #[arg(value_name = "MODEL")]
305        model_ref: String,
306        /// Force re-download even if cached
307        #[arg(long)]
308        force: bool,
309    },
310    /// List cached models
311    #[command(name = "list", alias = "ls")]
312    List,
313    /// Remove model from cache
314    #[command(name = "rm", alias = "remove")]
315    Rm {
316        /// Model reference to remove
317        #[arg(value_name = "MODEL")]
318        model_ref: String,
319    },
320    /// Convert/optimize model
321    Convert {
322        /// Path to .apr model file
323        #[arg(value_name = "FILE")]
324        file: PathBuf,
325        /// Quantize to format (int8, int4, fp16, q4k)
326        #[arg(long)]
327        quantize: Option<String>,
328        /// Compress output (none, zstd, zstd-max, lz4)
329        #[arg(long)]
330        compress: Option<String>,
331        /// Output file path
332        #[arg(short, long)]
333        output: PathBuf,
334        /// Force overwrite existing files
335        #[arg(short, long)]
336        force: bool,
337    },
338    /// Compile model into standalone executable (APR-SPEC ยง4.16)
339    Compile {
340        /// Input .apr model file
341        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
342        file: Option<PathBuf>,
343        /// Output binary path (default: derived from model name)
344        #[arg(short, long)]
345        output: Option<PathBuf>,
346        /// Target triple (e.g., x86_64-unknown-linux-musl)
347        #[arg(long)]
348        target: Option<String>,
349        /// Quantize weights before embedding (int8, int4, fp16)
350        #[arg(long)]
351        quantize: Option<String>,
352        /// Release mode (optimized)
353        #[arg(long)]
354        release: bool,
355        /// Strip debug symbols
356        #[arg(long)]
357        strip: bool,
358        /// Enable LTO (Link-Time Optimization)
359        #[arg(long)]
360        lto: bool,
361        /// List available compilation targets
362        #[arg(long)]
363        list_targets: bool,
364    },
365    /// Merge multiple models
366    Merge {
367        /// Model files to merge
368        #[arg(value_name = "FILES", num_args = 2..)]
369        files: Vec<PathBuf>,
370        /// Merge strategy (average, weighted, slerp, ties, dare)
371        #[arg(long, default_value = "average")]
372        strategy: String,
373        /// Output file path
374        #[arg(short, long)]
375        output: PathBuf,
376        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
377        #[arg(long, value_delimiter = ',')]
378        weights: Option<Vec<f32>>,
379        /// Base model for TIES/DARE (task vectors computed as delta from base)
380        #[arg(long)]
381        base_model: Option<PathBuf>,
382        /// DARE drop probability (default: 0.9)
383        #[arg(long, default_value = "0.9")]
384        drop_rate: f32,
385        /// TIES trim density threshold (default: 0.2)
386        #[arg(long, default_value = "0.2")]
387        density: f32,
388        /// RNG seed for DARE (default: 42)
389        #[arg(long, default_value = "42")]
390        seed: u64,
391        /// Plan mode (validate inputs, show merge plan, no execution)
392        #[arg(long)]
393        plan: bool,
394    },
395    /// Quantize model weights (GH-243)
396    Quantize {
397        /// Input model file
398        #[arg(value_name = "FILE")]
399        file: PathBuf,
400        /// Quantization scheme: int8, int4, fp16, q4k
401        #[arg(long, short = 's', default_value = "int4")]
402        scheme: String,
403        /// Output file path (required unless --plan)
404        #[arg(short, long)]
405        output: Option<PathBuf>,
406        /// Output format override (apr, gguf, safetensors)
407        #[arg(long)]
408        format: Option<String>,
409        /// Batch quantization (comma-separated schemes)
410        #[arg(long)]
411        batch: Option<String>,
412        /// Plan mode (estimate only, no execution)
413        #[arg(long)]
414        plan: bool,
415        /// Force overwrite existing files
416        #[arg(short, long)]
417        force: bool,
418    },
419    /// Model optimization commands (fine-tune, prune, distill)
420    #[command(flatten)]
421    ModelOps(ModelOpsCommands),
422    /// Interactive terminal UI
423    Tui {
424        /// Path to .apr model file
425        #[arg(value_name = "FILE")]
426        file: Option<PathBuf>,
427    },
428    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
429    Check {
430        /// Path to model file
431        #[arg(value_name = "FILE")]
432        file: PathBuf,
433        /// Disable GPU acceleration
434        #[arg(long)]
435        no_gpu: bool,
436        /// Output as JSON
437        #[arg(long)]
438        json: bool,
439    },
440    /// GPU status and VRAM reservation management (GPU-SHARE-001)
441    #[cfg(feature = "training")]
442    Gpu {
443        /// Show reservations as JSON
444        #[arg(long)]
445        json: bool,
446    },
447    /// Extended analysis, profiling, QA, and visualization commands
448    #[command(flatten)]
449    Extended(ExtendedCommands),
450}