apr_cli/
commands_enum.rs

1
2/// Output format for `apr code` non-interactive mode (PMAT-CODE-OUTPUT-FORMAT-001).
3/// Mirrors Claude Code's `claude -p --output-format <fmt>` parity row.
4#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
5pub enum CodeOutputFormat {
6    /// Plain assistant text on stdout (default; existing behavior).
7    #[default]
8    Text,
9    /// Structured JSON envelope: `{type:"result", subtype:"success", result, session_id, duration_ms}`.
10    Json,
11}
12
13/// Input format for `apr code` non-interactive mode (PMAT-CODE-INPUT-FORMAT-001).
14/// `--input-format json` reads `{"role":"user","content":"..."}` from stdin instead
15/// of treating stdin as raw prompt text.
16#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
17pub enum CodeInputFormat {
18    /// Raw prompt text from positional args or stdin (default; existing behavior).
19    #[default]
20    Text,
21    /// JSON message envelope on stdin: `{"role":"user","content":"..."}`.
22    Json,
23}
24
25#[derive(Subcommand, Debug)]
26pub enum Commands {
27    /// Run model directly (auto-download, cache, execute)
28    Run {
29        /// Model source: local path, hf://org/repo, or URL
30        #[arg(value_name = "SOURCE")]
31        source: String,
32        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
33        #[arg(value_name = "PROMPT")]
34        positional_prompt: Option<String>,
35        /// Input file (audio, text, etc.)
36        #[arg(short, long)]
37        input: Option<PathBuf>,
38        /// Text prompt for generation (for LLM models)
39        #[arg(short, long)]
40        prompt: Option<String>,
41        /// Maximum tokens to generate (default: 32)
42        #[arg(short = 'n', long, default_value = "32")]
43        max_tokens: usize,
44        /// Enable streaming output
45        #[arg(long)]
46        stream: bool,
47        /// Language code (for ASR models)
48        #[arg(short, long)]
49        language: Option<String>,
50        /// Task (transcribe, translate)
51        #[arg(short, long)]
52        task: Option<String>,
53        /// Output format (text, json, srt, vtt)
54        #[arg(short = 'f', long, default_value = "text")]
55        format: String,
56        /// Disable GPU acceleration (force CPU-only inference)
57        #[arg(long, alias = "cpu", conflicts_with = "gpu")]
58        no_gpu: bool,
59        /// Force GPU acceleration
60        #[arg(long, conflicts_with = "no_gpu")]
61        gpu: bool,
62        /// Offline mode: block all network access (Sovereign AI compliance)
63        #[arg(long)]
64        offline: bool,
65        /// Benchmark mode: output performance metrics (tok/s, latency)
66        #[arg(long)]
67        benchmark: bool,
68        /// Enable inference tracing (APR-TRACE-001)
69        #[arg(long)]
70        trace: bool,
71        /// Trace specific steps only (comma-separated)
72        #[arg(long, value_delimiter = ',')]
73        trace_steps: Option<Vec<String>>,
74        /// Verbose tracing (show tensor values)
75        #[arg(long)]
76        trace_verbose: bool,
77        /// Save trace output to JSON file
78        #[arg(long, value_name = "FILE")]
79        trace_output: Option<PathBuf>,
80        /// Trace detail level (none, basic, layer, payload, chrome)
81        /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
82        /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
83        #[arg(long, value_name = "LEVEL", default_value = "basic")]
84        trace_level: String,
85        /// Shorthand for --trace --trace-level payload (tensor value inspection)
86        #[arg(long)]
87        trace_payload: bool,
88        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
89        #[arg(long)]
90        profile: bool,
91        /// Apply chat template for Instruct models (GAP-UX-001)
92        ///
93        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
94        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
95        #[arg(long)]
96        chat: bool,
97        /// Sampling temperature (0.0 = greedy, default: 0.0)
98        #[arg(long, default_value = "0.0")]
99        temperature: f32,
100        /// Top-k sampling (default: 1 = greedy)
101        #[arg(long, default_value = "1")]
102        top_k: usize,
103        /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
104        /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
105        #[arg(long)]
106        top_p: Option<f32>,
107        /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
108        /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
109        #[arg(long, default_value = "299792458")]
110        seed: u64,
111        /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
112        /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
113        #[arg(long, default_value = "1.0")]
114        repeat_penalty: f32,
115        /// Context window for repetition penalty (number of recent tokens to check)
116        /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
117        #[arg(long, default_value = "64")]
118        repeat_last_n: usize,
119        /// Process prompt tokens one-by-one instead of batched prefill.
120        /// Useful for debugging prefill correctness (comparing per-token attention).
121        /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
122        #[arg(long)]
123        split_prompt: bool,
124        /// Batch mode: read prompts from JSONL, output results as JSONL.
125        /// Model loads once, processes all prompts sequentially.
126        /// Each input line: {"prompt": "...", "task_id": "..."}
127        /// Chat template is applied automatically.
128        #[arg(long, value_name = "FILE")]
129        batch_jsonl: Option<PathBuf>,
130        /// Show verbose output (model loading, backend info)
131        #[arg(short, long)]
132        verbose: bool,
133        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
134        #[arg(long, value_name = "BACKEND")]
135        backend: Option<String>,
136    },
137    /// Inference server (plan/run)
138    Serve {
139        #[command(subcommand)]
140        command: ServeCommands,
141    },
142    /// Inspect model metadata, vocab, and structure
143    Inspect {
144        /// Path to .apr model file
145        #[arg(value_name = "FILE")]
146        file: PathBuf,
147        /// Show vocabulary details
148        #[arg(long)]
149        vocab: bool,
150        /// Show filter/security details
151        #[arg(long)]
152        filters: bool,
153        /// Show weight statistics
154        #[arg(long)]
155        weights: bool,
156        /// Output as JSON
157        #[arg(long)]
158        json: bool,
159        /// Emit a 0-100 model quality score block.
160        ///
161        /// Per SPEC-SHIP-TWO-001 §84 P3-A (AC-SHIP2-007 quality
162        /// threshold ≥ 90). The score aggregates: physics checks
163        /// (no NaN/Inf, no all-zero tensors), structural
164        /// completeness (architecture / hidden_size / num_layers
165        /// metadata present), provenance (license + data_source +
166        /// data_license non-empty), HF identity (hf_architecture
167        /// stamped per PMAT-690 P0-K), and tokenizer presence
168        /// (has_vocab + embedded merges). A ship-blocking model
169        /// MUST score ≥ 90 by this rubric.
170        #[arg(long)]
171        quality: bool,
172    },
173    /// Simple debugging output ("drama" mode available)
174    Debug {
175        /// Path to .apr model file
176        #[arg(value_name = "FILE")]
177        file: PathBuf,
178        /// Theatrical "drama" mode output
179        #[arg(long)]
180        drama: bool,
181        /// Show hex dump
182        #[arg(long)]
183        hex: bool,
184        /// Extract ASCII strings
185        #[arg(long)]
186        strings: bool,
187        /// Limit output lines
188        #[arg(long, default_value = "256")]
189        limit: usize,
190    },
191    /// Validate model integrity and quality
192    Validate {
193        /// Path to .apr model file
194        #[arg(value_name = "FILE")]
195        file: PathBuf,
196        /// Show 100-point quality assessment
197        #[arg(long)]
198        quality: bool,
199        /// Strict validation (fail on warnings)
200        #[arg(long)]
201        strict: bool,
202        /// Minimum score to pass (0-100)
203        #[arg(long)]
204        min_score: Option<u8>,
205    },
206    /// Validate a publish manifest (FALSIFY-PM-001..006).
207    ///
208    /// Contract: `contracts/publish-manifest-v1.yaml`
209    /// Spec:     SPEC-SHIP-TWO-001 §12.3 AC-EX-004
210    ValidateManifest {
211        /// Path to manifest YAML
212        #[arg(value_name = "MANIFEST")]
213        file: PathBuf,
214        /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
215        #[arg(long, value_name = "APR_FILE")]
216        artifact: Option<PathBuf>,
217        /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
218        /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
219        /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
220        #[arg(long)]
221        live: bool,
222    },
223    /// Compare two models
224    Diff {
225        /// First model file
226        #[arg(value_name = "FILE1")]
227        file1: PathBuf,
228        /// Second model file
229        #[arg(value_name = "FILE2")]
230        file2: PathBuf,
231        /// Show weight-level differences
232        #[arg(long)]
233        weights: bool,
234        /// Compare actual tensor values with statistical analysis
235        #[arg(long)]
236        values: bool,
237        /// Filter tensors by name pattern (for --values)
238        #[arg(long)]
239        filter: Option<String>,
240        /// Maximum number of tensors to compare (for --values)
241        #[arg(long, default_value = "10")]
242        limit: usize,
243        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
244        #[arg(long)]
245        transpose_aware: bool,
246        /// Output as JSON
247        #[arg(long)]
248        json: bool,
249        /// CRUX-B-20: per-tensor quant roundtrip error report (RMSE / cosine / max_abs).
250        /// FILE1 is the reference (fp16/fp32/bf16); FILE2 is the quantized variant.
251        #[arg(long)]
252        quant_roundtrip: bool,
253        /// CRUX-B-20: cosine threshold for the quant-roundtrip exit-code gate.
254        /// Any tensor with cosine < threshold makes the command exit non-zero.
255        #[arg(long, default_value = "0.95")]
256        threshold: f32,
257        /// CRUX-B-20: suppress the threshold exit-code gate (still emits the report).
258        #[arg(long)]
259        no_threshold: bool,
260    },
261    /// List tensor names and shapes
262    Tensors {
263        /// Path to .apr model file
264        #[arg(value_name = "FILE")]
265        file: PathBuf,
266        /// Show tensor statistics (mean, std, min, max)
267        #[arg(long)]
268        stats: bool,
269        /// Filter tensors by name pattern
270        #[arg(long)]
271        filter: Option<String>,
272        /// Limit number of tensors shown (0 = unlimited)
273        #[arg(long, default_value = "0")]
274        limit: usize,
275        /// Output as JSON
276        #[arg(long)]
277        json: bool,
278    },
279    /// Layer-by-layer trace analysis
280    Trace {
281        /// Path to .apr model file
282        #[arg(value_name = "FILE")]
283        file: PathBuf,
284        /// Filter layers by name pattern
285        #[arg(long)]
286        layer: Option<String>,
287        /// Compare with reference model
288        #[arg(long)]
289        reference: Option<PathBuf>,
290        /// Output as JSON
291        #[arg(long)]
292        json: bool,
293        /// Verbose output with per-layer stats
294        #[arg(short, long)]
295        verbose: bool,
296        /// Trace payload through model
297        #[arg(long)]
298        payload: bool,
299        /// Diff mode
300        #[arg(long)]
301        diff: bool,
302        /// Interactive mode
303        #[arg(long)]
304        interactive: bool,
305        /// Save per-stage F32 tensors during trace for SHIP-007 layer-0
306        /// element-wise diff. Comma-separated stage names from
307        /// `apr-cli-trace-save-tensor-v1.yaml` (e.g.
308        /// `embedding,qkv_matmul,attention`). Pass `all` to save every
309        /// stage. Output goes to `--save-tensor-dir` if provided,
310        /// else `<file_dir>/trace-tensors/<run_id>/`.
311        #[arg(long, value_name = "STAGES")]
312        save_tensor: Option<String>,
313        /// Output directory for `--save-tensor` (default: sibling
314        /// `trace-tensors/<run_id>/`).
315        #[arg(long, value_name = "DIR")]
316        save_tensor_dir: Option<PathBuf>,
317        /// Layer-id range for `--save-tensor` (default: 0..1, i.e.
318        /// layer 0 only). Format: `START..END` (Rust range syntax,
319        /// END exclusive).
320        #[arg(long, value_name = "RANGE", default_value = "0..1")]
321        save_tensor_layers: String,
322    },
323    /// Check for best practices and conventions
324    Lint {
325        /// Path to .apr model file
326        #[arg(value_name = "FILE")]
327        file: PathBuf,
328    },
329    /// Evaluate a BeatBenchmark contract against a measured value (PMAT-741)
330    #[command(name = "beat-run")]
331    BeatRun {
332        /// Path to a beat-benchmark contract YAML (e.g. contracts/beat-sklearn-iris-v1.yaml)
333        #[arg(value_name = "CONTRACT")]
334        contract: PathBuf,
335        /// Measured metric value; when given, emit a WON/REGRESSED verdict and
336        /// exit non-zero on regression. Omit to just report the pinned baseline.
337        #[arg(long, value_name = "VALUE")]
338        measured: Option<f64>,
339    },
340    /// Emit a SHA-256 manifest of input files (CRUX-G-05)
341    Manifest {
342        /// Files to include in the manifest (one entry per file)
343        #[arg(value_name = "FILES", num_args = 1..)]
344        files: Vec<PathBuf>,
345        /// Output JSON manifest path
346        #[arg(short, long, value_name = "MAN_JSON")]
347        output: PathBuf,
348    },
349    /// Explain errors, architecture, tensors, and kernel dispatch
350    Explain {
351        /// Error code, model file path, or family name (auto-detected)
352        #[arg(value_name = "CODE_OR_FILE")]
353        code_or_file: Option<String>,
354        /// Path to .apr model file (optional context for --tensor)
355        #[arg(short, long)]
356        file: Option<PathBuf>,
357        /// Explain a specific tensor
358        #[arg(long)]
359        tensor: Option<String>,
360        /// Explain kernel dispatch pipeline for architecture
361        #[arg(long)]
362        kernel: bool,
363        /// Output as JSON
364        #[arg(long)]
365        json: bool,
366        /// Show kernel contract details and proof obligations
367        #[arg(short, long)]
368        verbose: bool,
369        /// Show per-kernel proof status from contract tests
370        #[arg(long)]
371        proof_status: bool,
372    },
373    /// Manage canary tests for regression
374    Canary {
375        #[command(subcommand)]
376        command: CanaryCommands,
377    },
378    /// Export model to other formats
379    Export {
380        /// Path to .apr model file
381        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
382        file: Option<PathBuf>,
383        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
384        #[arg(long, default_value = "safetensors")]
385        format: String,
386        /// Output file/directory path
387        #[arg(short, long)]
388        output: Option<PathBuf>,
389        /// Apply quantization during export (int8, int4, fp16)
390        #[arg(long)]
391        quantize: Option<String>,
392        /// List all supported export formats
393        #[arg(long)]
394        list_formats: bool,
395        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
396        #[arg(long)]
397        batch: Option<String>,
398        /// Output in JSON format
399        #[arg(long)]
400        json: bool,
401        /// Plan mode (validate inputs, show export plan, no execution)
402        #[arg(long)]
403        plan: bool,
404    },
405    /// Import from external formats (hf://org/repo, local files, URLs)
406    Import {
407        /// Source: hf://org/repo, local file, or URL
408        #[arg(value_name = "SOURCE")]
409        source: String,
410        /// Output .apr file path (default: derived from source name)
411        #[arg(short, long)]
412        output: Option<PathBuf>,
413        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
414        #[arg(long, default_value = "auto")]
415        arch: String,
416        /// Quantization (int8, int4, fp16)
417        #[arg(long)]
418        quantize: Option<String>,
419        /// Strict mode: reject unverified architectures and fail on validation errors
420        #[arg(long)]
421        strict: bool,
422        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
423        /// Uses realizar's Q4K converter instead of dequantizing to F32
424        #[arg(long)]
425        preserve_q4k: bool,
426        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
427        /// Required if the GGUF has no embedded tokenizer vocabulary.
428        #[arg(long)]
429        tokenizer: Option<PathBuf>,
430        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
431        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
432        #[arg(long)]
433        enforce_provenance: bool,
434        /// GH-223: Allow import without config.json (default: error).
435        /// Without config.json, hyperparameters like rope_theta are inferred from
436        /// tensor shapes and may be wrong, producing garbage output.
437        #[arg(long)]
438        allow_no_config: bool,
439    },
440    /// Download and cache model OR HuggingFace dataset (Ollama-like UX)
441    Pull {
442        /// Model reference (alias, hf:// URI, or org/repo) OR "dataset"
443        /// asset-type discriminator. When this value is the literal
444        /// string "dataset", the next positional `repo` is the
445        /// HuggingFace dataset repo and dataset-pull semantics apply.
446        #[arg(value_name = "MODEL_OR_ASSET_TYPE")]
447        model_ref: String,
448        /// Dataset repository (used only when model_ref == "dataset").
449        /// Per `apr-cli-pull-dataset-v1.yaml`.
450        #[arg(value_name = "REPO")]
451        repo: Option<String>,
452        /// Force re-download even if cached
453        #[arg(long)]
454        force: bool,
455        /// CRUX-A-01: resolve short name to canonical URL and exit without
456        /// performing any network I/O.
457        #[arg(long)]
458        dry_run: bool,
459        /// CRUX-A-03: pin to a specific branch, tag, or git SHA on the remote
460        /// (HuggingFace Hub). Defaults to "main" when omitted.
461        #[arg(long, value_name = "REV")]
462        revision: Option<String>,
463        /// CRUX-A-20: offline mode — forbid any outbound network I/O.
464        /// Equivalent to APR_OFFLINE=1 or HF_HUB_OFFLINE=1 in the environment.
465        #[arg(long)]
466        offline: bool,
467        /// (dataset mode) Glob pattern for shard selection. May be passed
468        /// multiple times; matches are unioned. fnmatch-compatible
469        /// (`*`, `?`, `[a-z]`). No-match is fail-fast.
470        #[arg(long, value_name = "GLOB")]
471        include: Vec<String>,
472        /// (dataset mode) Output directory. Default:
473        /// `~/.cache/aprender/datasets/<repo>/`.
474        #[arg(short = 'o', long)]
475        output: Option<PathBuf>,
476    },
477    /// Registry operations (CRUX-A-01): inspect alias map, etc.
478    Registry {
479        #[command(subcommand)]
480        command: crate::commands::registry::RegistryCommands,
481    },
482    /// List cached models
483    #[command(name = "list", alias = "ls")]
484    List,
485    /// Remove model from cache
486    #[command(name = "rm", alias = "remove")]
487    Rm {
488        /// Model reference to remove
489        #[arg(value_name = "MODEL")]
490        model_ref: String,
491    },
492    /// Convert/optimize model
493    Convert {
494        /// Path to .apr model file
495        #[arg(value_name = "FILE")]
496        file: PathBuf,
497        /// Quantize to format (int8, int4, fp16, q4k)
498        #[arg(long)]
499        quantize: Option<String>,
500        /// Compress output (none, zstd, zstd-max, lz4)
501        #[arg(long)]
502        compress: Option<String>,
503        /// Output file path
504        #[arg(short, long)]
505        output: PathBuf,
506        /// Force overwrite existing files
507        #[arg(short, long)]
508        force: bool,
509    },
510    /// Stamp provenance fields (license, data_source, data_license) onto an existing .apr file
511    ///
512    /// SHIP-009 full-discharge enabler — patches the three provenance fields on
513    /// a pre-built APR v2 artifact (e.g., the shipped MODEL-1 teacher whose
514    /// fields are all (missing) because it was built before GATE-APR-PROV-001..003
515    /// shipped). Tensor bytes and header flags are preserved verbatim.
516    Stamp {
517        /// Path to input .apr model file
518        #[arg(value_name = "FILE")]
519        file: PathBuf,
520        /// SPDX license identifier (e.g., Apache-2.0)
521        #[arg(long)]
522        license: Option<String>,
523        /// Training-data source (e.g., huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
524        #[arg(long = "data-source")]
525        data_source: Option<String>,
526        /// SPDX license for data_source (e.g., Apache-2.0)
527        #[arg(long = "data-license")]
528        data_license: Option<String>,
529        /// HuggingFace class name (e.g., Qwen2ForCausalLM, LlamaForCausalLM).
530        ///
531        /// PMAT-690 P0-K extension (SPEC §86): patch the upstream
532        /// `architectures[0]` stamp on a pre-P0-K APR so downstream
533        /// consumers (apr inspect --quality, apr pretrain --init,
534        /// apr export → llama-cli) see the correct HF identity.
535        #[arg(long = "hf-architecture")]
536        hf_architecture: Option<String>,
537        /// HuggingFace model_type slug (e.g., qwen2, llama).
538        ///
539        /// PMAT-690 P0-K extension (SPEC §86).
540        #[arg(long = "hf-model-type")]
541        hf_model_type: Option<String>,
542        /// Lowercase architecture family slug (e.g., qwen2, llama).
543        ///
544        /// PMAT-690 P0-K extension (SPEC §86). This is the field
545        /// `apr pretrain --init` reads for arch dispatch — without
546        /// patching it, pre-P0-K checkpoints with the P0-H "LlamaForCausalLM"
547        /// fallback in this field cannot be loaded as Qwen2 inits.
548        #[arg(long)]
549        architecture: Option<String>,
550        /// Directory containing tokenizer files (vocab.json + merges.txt
551        /// OR tokenizer.json). When provided, embeds the vocabulary +
552        /// BPE merges into the APR's `custom.tokenizer.vocabulary` /
553        /// `custom.tokenizer.merges` JSON metadata AND sets the
554        /// HAS_VOCAB header flag.
555        ///
556        /// PMAT-690 P3-C-prep defect 1 fix (2026-05-17): pre-P0-K APRs
557        /// trained from inits without embedded tokenizers fail `apr run`
558        /// with PMAT-172. This flag lets the §86 salvage recipe embed
559        /// the tokenizer post-hoc so the artifact is self-contained
560        /// for inference (the apr binary's headline use case).
561        #[arg(long = "tokenizer", value_name = "DIR")]
562        tokenizer_dir: Option<PathBuf>,
563        /// Output file path
564        #[arg(short, long)]
565        output: PathBuf,
566        /// Force overwrite existing files
567        #[arg(short, long)]
568        force: bool,
569    },
570    /// Compile model into standalone executable (APR-SPEC §4.16)
571    Compile {
572        /// Input .apr model file
573        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
574        file: Option<PathBuf>,
575        /// Output binary path (default: derived from model name)
576        #[arg(short, long)]
577        output: Option<PathBuf>,
578        /// Target triple (e.g., x86_64-unknown-linux-musl)
579        #[arg(long)]
580        target: Option<String>,
581        /// Quantize weights before embedding (int8, int4, fp16)
582        #[arg(long)]
583        quantize: Option<String>,
584        /// Release mode (optimized)
585        #[arg(long)]
586        release: bool,
587        /// Strip debug symbols
588        #[arg(long)]
589        strip: bool,
590        /// Enable LTO (Link-Time Optimization)
591        #[arg(long)]
592        lto: bool,
593        /// List available compilation targets
594        #[arg(long)]
595        list_targets: bool,
596    },
597    /// Merge multiple models
598    Merge {
599        /// Model files to merge
600        #[arg(value_name = "FILES", num_args = 2..)]
601        files: Vec<PathBuf>,
602        /// Merge strategy (average, weighted, slerp, ties, dare)
603        #[arg(long, default_value = "average")]
604        strategy: String,
605        /// Output file path (optional in --plan mode)
606        #[arg(short, long, required_unless_present = "plan")]
607        output: Option<PathBuf>,
608        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
609        #[arg(long, value_delimiter = ',')]
610        weights: Option<Vec<f32>>,
611        /// Base model for TIES/DARE (task vectors computed as delta from base)
612        #[arg(long)]
613        base_model: Option<PathBuf>,
614        /// DARE drop probability (default: 0.9)
615        #[arg(long, default_value = "0.9")]
616        drop_rate: f32,
617        /// TIES trim density threshold (default: 0.2)
618        #[arg(long, default_value = "0.2")]
619        density: f32,
620        /// RNG seed for DARE (default: 42)
621        #[arg(long, default_value = "42")]
622        seed: u64,
623        /// Plan mode (validate inputs, show merge plan, no execution)
624        #[arg(long)]
625        plan: bool,
626    },
627    /// Quantize model weights (GH-243)
628    Quantize {
629        /// Input model file
630        #[arg(value_name = "FILE")]
631        file: PathBuf,
632        /// Quantization scheme: int8, int4, fp16, q4k
633        #[arg(long, short = 's', default_value = "int4")]
634        scheme: String,
635        /// Output file path (required unless --plan)
636        #[arg(short, long)]
637        output: Option<PathBuf>,
638        /// Output format override (apr, gguf, safetensors)
639        #[arg(long)]
640        format: Option<String>,
641        /// Batch quantization (comma-separated schemes)
642        #[arg(long)]
643        batch: Option<String>,
644        /// Plan mode (estimate only, no execution)
645        #[arg(long)]
646        plan: bool,
647        /// Force overwrite existing files
648        #[arg(short, long)]
649        force: bool,
650    },
651    /// Model optimization commands (fine-tune, prune, distill)
652    #[command(flatten)]
653    ModelOps(ModelOpsCommands),
654    /// Start the MCP (Model Context Protocol) server over stdio
655    ///
656    /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
657    /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
658    Mcp {},
659    /// Interactive terminal UI
660    Tui {
661        /// Path to .apr model file
662        #[arg(value_name = "FILE")]
663        file: Option<PathBuf>,
664    },
665    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
666    Check {
667        /// Path to model file
668        #[arg(value_name = "FILE")]
669        file: PathBuf,
670        /// Disable GPU acceleration
671        #[arg(long)]
672        no_gpu: bool,
673        /// Output as JSON
674        #[arg(long)]
675        json: bool,
676    },
677    /// GPU status and VRAM reservation management (GPU-SHARE-001)
678    #[cfg(feature = "training")]
679    Gpu {
680        /// Show reservations as JSON
681        #[arg(long)]
682        json: bool,
683    },
684    /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
685    
686    Code {
687        /// Path to local GGUF/APR model file (prefers .apr format)
688        #[arg(long)]
689        model: Option<PathBuf>,
690
691        /// Project directory (loads APR.md/CLAUDE.md from this path)
692        #[arg(long, default_value = ".")]
693        project: PathBuf,
694
695        /// Resume previous session (optionally by ID)
696        #[arg(long)]
697        resume: Option<Option<String>>,
698
699        /// Agent manifest (advanced — overrides defaults)
700        #[arg(long)]
701        manifest: Option<PathBuf>,
702
703        /// Initial prompt (non-interactive: print response and exit)
704        #[arg(short, long)]
705        print: bool,
706
707        /// Prompt text (positional, for -p mode)
708        #[arg(trailing_var_arg = true)]
709        prompt: Vec<String>,
710
711        /// Max turns before stopping
712        #[arg(long, default_value = "50")]
713        max_turns: u32,
714
715        /// Emit a `ccpa-trace.jsonl` describing the run to this path.
716        /// Format mirrors the schema at
717        /// <https://github.com/paiml/claude-code-parity-apr/blob/main/contracts/claude-code-parity-apr-v1.yaml>
718        /// (`§ trace_schema`). Used by `ccpa measure` to score apr-code
719        /// against canonical Claude Code reference fixtures.
720        #[arg(long)]
721        emit_trace: Option<PathBuf>,
722
723        /// Output format for non-interactive (`-p`) mode (PMAT-CODE-OUTPUT-FORMAT-001).
724        /// `text` (default): plain assistant text.
725        /// `json`: structured `{type:"result", subtype:"success", result, session_id, duration_ms}`
726        /// envelope matching Claude Code's `claude -p --output-format json` shape.
727        #[arg(long, value_enum, default_value_t = CodeOutputFormat::Text)]
728        output_format: CodeOutputFormat,
729
730        /// Input format for non-interactive stdin (PMAT-CODE-INPUT-FORMAT-001).
731        /// `text` (default): treat stdin as raw prompt text.
732        /// `json`: parse `{"role":"user","content":"..."}` from stdin and use `content`
733        /// as the prompt. Matches Claude Code's `claude -p --input-format json` shape.
734        #[arg(long, value_enum, default_value_t = CodeInputFormat::Text)]
735        input_format: CodeInputFormat,
736    },
737    /// Extended analysis, profiling, QA, and visualization commands
738    #[command(flatten)]
739    Extended(ExtendedCommands),
740
741    /// Monorepo management (publish, shims, audit, archive) [dev-only]
742    #[cfg(feature = "dev")]
743    #[command(subcommand)]
744    Mono(crate::commands::mono::MonoCommands),
745}
apr_cli/commands_enum.rs

apr_cli/
commands_enum.rs