apr_cli/
commands_enum.rs

1
2#[derive(Subcommand, Debug)]
3pub enum Commands {
4    /// Run model directly (auto-download, cache, execute)
5    Run {
6        /// Model source: local path, hf://org/repo, or URL
7        #[arg(value_name = "SOURCE")]
8        source: String,
9        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
10        #[arg(value_name = "PROMPT")]
11        positional_prompt: Option<String>,
12        /// Input file (audio, text, etc.)
13        #[arg(short, long)]
14        input: Option<PathBuf>,
15        /// Text prompt for generation (for LLM models)
16        #[arg(short, long)]
17        prompt: Option<String>,
18        /// Maximum tokens to generate (default: 32)
19        #[arg(short = 'n', long, default_value = "32")]
20        max_tokens: usize,
21        /// Enable streaming output
22        #[arg(long)]
23        stream: bool,
24        /// Language code (for ASR models)
25        #[arg(short, long)]
26        language: Option<String>,
27        /// Task (transcribe, translate)
28        #[arg(short, long)]
29        task: Option<String>,
30        /// Output format (text, json, srt, vtt)
31        #[arg(short = 'f', long, default_value = "text")]
32        format: String,
33        /// Disable GPU acceleration
34        #[arg(long, conflicts_with = "gpu")]
35        no_gpu: bool,
36        /// Force GPU acceleration
37        #[arg(long, conflicts_with = "no_gpu")]
38        gpu: bool,
39        /// Offline mode: block all network access (Sovereign AI compliance)
40        #[arg(long)]
41        offline: bool,
42        /// Benchmark mode: output performance metrics (tok/s, latency)
43        #[arg(long)]
44        benchmark: bool,
45        /// Enable inference tracing (APR-TRACE-001)
46        #[arg(long)]
47        trace: bool,
48        /// Trace specific steps only (comma-separated)
49        #[arg(long, value_delimiter = ',')]
50        trace_steps: Option<Vec<String>>,
51        /// Verbose tracing (show tensor values)
52        #[arg(long)]
53        trace_verbose: bool,
54        /// Save trace output to JSON file
55        #[arg(long, value_name = "FILE")]
56        trace_output: Option<PathBuf>,
57        /// Trace detail level (none, basic, layer, payload)
58        #[arg(long, value_name = "LEVEL", default_value = "basic")]
59        trace_level: String,
60        /// Shorthand for --trace --trace-level payload (tensor value inspection)
61        #[arg(long)]
62        trace_payload: bool,
63        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
64        #[arg(long)]
65        profile: bool,
66        /// Apply chat template for Instruct models (GAP-UX-001)
67        ///
68        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
69        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
70        #[arg(long)]
71        chat: bool,
72        /// Show verbose output (model loading, backend info)
73        #[arg(short, long)]
74        verbose: bool,
75    },
76    /// Start inference server (REST API, streaming, metrics)
77    Serve {
78        /// Path to model file
79        #[arg(value_name = "FILE")]
80        file: PathBuf,
81        /// Port to listen on
82        #[arg(short, long, default_value = "8080")]
83        port: u16,
84        /// Host to bind to
85        #[arg(long, default_value = "127.0.0.1")]
86        host: String,
87        /// Disable CORS
88        #[arg(long)]
89        no_cors: bool,
90        /// Disable Prometheus metrics endpoint
91        #[arg(long)]
92        no_metrics: bool,
93        /// Disable GPU acceleration
94        #[arg(long)]
95        no_gpu: bool,
96        /// Force GPU acceleration (requires CUDA)
97        #[arg(long)]
98        gpu: bool,
99        /// Enable batched GPU inference for 2X+ throughput
100        #[arg(long)]
101        batch: bool,
102        /// Enable inference tracing (PMAT-SHOWCASE-METHODOLOGY-001)
103        #[arg(long)]
104        trace: bool,
105        /// Trace detail level (none, basic, layer)
106        #[arg(long, value_name = "LEVEL", default_value = "basic")]
107        trace_level: String,
108        /// Enable inline Roofline profiling (adds X-Profile headers)
109        #[arg(long)]
110        profile: bool,
111    },
112    /// Inspect model metadata, vocab, and structure
113    Inspect {
114        /// Path to .apr model file
115        #[arg(value_name = "FILE")]
116        file: PathBuf,
117        /// Show vocabulary details
118        #[arg(long)]
119        vocab: bool,
120        /// Show filter/security details
121        #[arg(long)]
122        filters: bool,
123        /// Show weight statistics
124        #[arg(long)]
125        weights: bool,
126        /// Output as JSON
127        #[arg(long)]
128        json: bool,
129    },
130    /// Simple debugging output ("drama" mode available)
131    Debug {
132        /// Path to .apr model file
133        #[arg(value_name = "FILE")]
134        file: PathBuf,
135        /// Theatrical "drama" mode output
136        #[arg(long)]
137        drama: bool,
138        /// Show hex dump
139        #[arg(long)]
140        hex: bool,
141        /// Extract ASCII strings
142        #[arg(long)]
143        strings: bool,
144        /// Limit output lines
145        #[arg(long, default_value = "256")]
146        limit: usize,
147    },
148    /// Validate model integrity and quality
149    Validate {
150        /// Path to .apr model file
151        #[arg(value_name = "FILE")]
152        file: PathBuf,
153        /// Show 100-point quality assessment
154        #[arg(long)]
155        quality: bool,
156        /// Strict validation (fail on warnings)
157        #[arg(long)]
158        strict: bool,
159        /// Minimum score to pass (0-100)
160        #[arg(long)]
161        min_score: Option<u8>,
162    },
163    /// Compare two models
164    Diff {
165        /// First model file
166        #[arg(value_name = "FILE1")]
167        file1: PathBuf,
168        /// Second model file
169        #[arg(value_name = "FILE2")]
170        file2: PathBuf,
171        /// Show weight-level differences
172        #[arg(long)]
173        weights: bool,
174        /// Compare actual tensor values with statistical analysis
175        #[arg(long)]
176        values: bool,
177        /// Filter tensors by name pattern (for --values)
178        #[arg(long)]
179        filter: Option<String>,
180        /// Maximum number of tensors to compare (for --values)
181        #[arg(long, default_value = "10")]
182        limit: usize,
183        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
184        #[arg(long)]
185        transpose_aware: bool,
186        /// Output as JSON
187        #[arg(long)]
188        json: bool,
189    },
190    /// List tensor names and shapes
191    Tensors {
192        /// Path to .apr model file
193        #[arg(value_name = "FILE")]
194        file: PathBuf,
195        /// Show tensor statistics (mean, std, min, max)
196        #[arg(long)]
197        stats: bool,
198        /// Filter tensors by name pattern
199        #[arg(long)]
200        filter: Option<String>,
201        /// Limit number of tensors shown (0 = unlimited)
202        #[arg(long, default_value = "0")]
203        limit: usize,
204        /// Output as JSON
205        #[arg(long)]
206        json: bool,
207    },
208    /// Layer-by-layer trace analysis
209    Trace {
210        /// Path to .apr model file
211        #[arg(value_name = "FILE")]
212        file: PathBuf,
213        /// Filter layers by name pattern
214        #[arg(long)]
215        layer: Option<String>,
216        /// Compare with reference model
217        #[arg(long)]
218        reference: Option<PathBuf>,
219        /// Output as JSON
220        #[arg(long)]
221        json: bool,
222        /// Verbose output with per-layer stats
223        #[arg(short, long)]
224        verbose: bool,
225        /// Trace payload through model
226        #[arg(long)]
227        payload: bool,
228        /// Diff mode
229        #[arg(long)]
230        diff: bool,
231        /// Interactive mode
232        #[arg(long)]
233        interactive: bool,
234    },
235    /// Check for best practices and conventions
236    Lint {
237        /// Path to .apr model file
238        #[arg(value_name = "FILE")]
239        file: PathBuf,
240    },
241    /// Explain errors, architecture, and tensors
242    Explain {
243        /// Explain a specific error code
244        #[arg(value_name = "CODE")]
245        code: Option<String>,
246        /// Path to .apr model file (optional context)
247        #[arg(short, long)]
248        file: Option<PathBuf>,
249        /// Explain a specific tensor
250        #[arg(long)]
251        tensor: Option<String>,
252    },
253    /// Manage canary tests for regression
254    Canary {
255        #[command(subcommand)]
256        command: CanaryCommands,
257    },
258    /// Export model to other formats
259    Export {
260        /// Path to .apr model file
261        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
262        file: Option<PathBuf>,
263        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
264        #[arg(long, default_value = "safetensors")]
265        format: String,
266        /// Output file/directory path
267        #[arg(short, long)]
268        output: Option<PathBuf>,
269        /// Apply quantization during export (int8, int4, fp16)
270        #[arg(long)]
271        quantize: Option<String>,
272        /// List all supported export formats
273        #[arg(long)]
274        list_formats: bool,
275        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
276        #[arg(long)]
277        batch: Option<String>,
278        /// Output in JSON format
279        #[arg(long)]
280        json: bool,
281    },
282    /// Import from external formats (hf://org/repo, local files, URLs)
283    Import {
284        /// Source: hf://org/repo, local file, or URL
285        #[arg(value_name = "SOURCE")]
286        source: String,
287        /// Output .apr file path (default: derived from source name)
288        #[arg(short, long)]
289        output: Option<PathBuf>,
290        /// Model architecture (whisper, llama, bert, auto)
291        #[arg(long, default_value = "auto")]
292        arch: String,
293        /// Quantization (int8, int4, fp16)
294        #[arg(long)]
295        quantize: Option<String>,
296        /// Strict mode: reject unverified architectures and fail on validation errors
297        #[arg(long)]
298        strict: bool,
299        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
300        /// Uses realizar's Q4K converter instead of dequantizing to F32
301        #[arg(long)]
302        preserve_q4k: bool,
303        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
304        /// Required if the GGUF has no embedded tokenizer vocabulary.
305        #[arg(long)]
306        tokenizer: Option<PathBuf>,
307        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
308        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
309        #[arg(long)]
310        enforce_provenance: bool,
311        /// GH-223: Allow import without config.json (default: error).
312        /// Without config.json, hyperparameters like rope_theta are inferred from
313        /// tensor shapes and may be wrong, producing garbage output.
314        #[arg(long)]
315        allow_no_config: bool,
316    },
317    /// Download and cache model from HuggingFace (Ollama-like UX)
318    Pull {
319        /// Model reference (alias, hf:// URI, or org/repo)
320        #[arg(value_name = "MODEL")]
321        model_ref: String,
322        /// Force re-download even if cached
323        #[arg(long)]
324        force: bool,
325    },
326    /// List cached models
327    #[command(name = "list", alias = "ls")]
328    List,
329    /// Remove model from cache
330    #[command(name = "rm", alias = "remove")]
331    Rm {
332        /// Model reference to remove
333        #[arg(value_name = "MODEL")]
334        model_ref: String,
335    },
336    /// Convert/optimize model
337    Convert {
338        /// Path to .apr model file
339        #[arg(value_name = "FILE")]
340        file: PathBuf,
341        /// Quantize to format (int8, int4, fp16, q4k)
342        #[arg(long)]
343        quantize: Option<String>,
344        /// Compress output (none, zstd, zstd-max, lz4)
345        #[arg(long)]
346        compress: Option<String>,
347        /// Output file path
348        #[arg(short, long)]
349        output: PathBuf,
350        /// Force overwrite existing files
351        #[arg(short, long)]
352        force: bool,
353    },
354    /// Merge multiple models
355    Merge {
356        /// Model files to merge
357        #[arg(value_name = "FILES", num_args = 2..)]
358        files: Vec<PathBuf>,
359        /// Merge strategy (average, weighted, slerp, ties, dare)
360        #[arg(long, default_value = "average")]
361        strategy: String,
362        /// Output file path
363        #[arg(short, long)]
364        output: PathBuf,
365        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
366        #[arg(long, value_delimiter = ',')]
367        weights: Option<Vec<f32>>,
368        /// Base model for TIES/DARE (task vectors computed as delta from base)
369        #[arg(long)]
370        base_model: Option<PathBuf>,
371        /// DARE drop probability (default: 0.9)
372        #[arg(long, default_value = "0.9")]
373        drop_rate: f32,
374        /// TIES trim density threshold (default: 0.2)
375        #[arg(long, default_value = "0.2")]
376        density: f32,
377        /// RNG seed for DARE (default: 42)
378        #[arg(long, default_value = "42")]
379        seed: u64,
380    },
381    /// Quantize model weights (GH-243)
382    Quantize {
383        /// Input model file
384        #[arg(value_name = "FILE")]
385        file: PathBuf,
386        /// Quantization scheme: int8, int4, fp16, q4k
387        #[arg(long, short = 's', default_value = "int4")]
388        scheme: String,
389        /// Output file path (required unless --plan)
390        #[arg(short, long)]
391        output: Option<PathBuf>,
392        /// Output format override (apr, gguf, safetensors)
393        #[arg(long)]
394        format: Option<String>,
395        /// Batch quantization (comma-separated schemes)
396        #[arg(long)]
397        batch: Option<String>,
398        /// Plan mode (estimate only, no execution)
399        #[arg(long)]
400        plan: bool,
401        /// Force overwrite existing files
402        #[arg(short, long)]
403        force: bool,
404    },
405    /// Model optimization commands (fine-tune, prune, distill)
406    #[command(flatten)]
407    ModelOps(ModelOpsCommands),
408    /// Interactive terminal UI
409    Tui {
410        /// Path to .apr model file
411        #[arg(value_name = "FILE")]
412        file: Option<PathBuf>,
413    },
414    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
415    Check {
416        /// Path to model file
417        #[arg(value_name = "FILE")]
418        file: PathBuf,
419        /// Disable GPU acceleration
420        #[arg(long)]
421        no_gpu: bool,
422        /// Output as JSON
423        #[arg(long)]
424        json: bool,
425    },
426    /// Extended analysis, profiling, QA, and visualization commands
427    #[command(flatten)]
428    Extended(ExtendedCommands),
429}
apr_cli/commands_enum.rs

apr_cli/
commands_enum.rs