Skip to main content

apr_cli/
commands_enum.rs

1
2/// Output format for `apr code` non-interactive mode (PMAT-CODE-OUTPUT-FORMAT-001).
3/// Mirrors Claude Code's `claude -p --output-format <fmt>` parity row.
4#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
5pub enum CodeOutputFormat {
6    /// Plain assistant text on stdout (default; existing behavior).
7    #[default]
8    Text,
9    /// Structured JSON envelope: `{type:"result", subtype:"success", result, session_id, duration_ms}`.
10    Json,
11}
12
13/// Input format for `apr code` non-interactive mode (PMAT-CODE-INPUT-FORMAT-001).
14/// `--input-format json` reads `{"role":"user","content":"..."}` from stdin instead
15/// of treating stdin as raw prompt text.
16#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
17pub enum CodeInputFormat {
18    /// Raw prompt text from positional args or stdin (default; existing behavior).
19    #[default]
20    Text,
21    /// JSON message envelope on stdin: `{"role":"user","content":"..."}`.
22    Json,
23}
24
25#[derive(Subcommand, Debug)]
26pub enum Commands {
27    /// Run model directly (auto-download, cache, execute)
28    Run {
29        /// Model source: local path, hf://org/repo, or URL
30        #[arg(value_name = "SOURCE")]
31        source: String,
32        /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
33        #[arg(value_name = "PROMPT")]
34        positional_prompt: Option<String>,
35        /// Input file (audio, text, etc.)
36        #[arg(short, long)]
37        input: Option<PathBuf>,
38        /// Text prompt for generation (for LLM models)
39        #[arg(short, long)]
40        prompt: Option<String>,
41        /// Maximum tokens to generate (default: 32)
42        #[arg(short = 'n', long, default_value = "32")]
43        max_tokens: usize,
44        /// Enable streaming output
45        #[arg(long)]
46        stream: bool,
47        /// Language code (for ASR models)
48        #[arg(short, long)]
49        language: Option<String>,
50        /// Task (transcribe, translate)
51        #[arg(short, long)]
52        task: Option<String>,
53        /// Output format (text, json, srt, vtt)
54        #[arg(short = 'f', long, default_value = "text")]
55        format: String,
56        /// Disable GPU acceleration (force CPU-only inference)
57        #[arg(long, alias = "cpu", conflicts_with = "gpu")]
58        no_gpu: bool,
59        /// Force GPU acceleration
60        #[arg(long, conflicts_with = "no_gpu")]
61        gpu: bool,
62        /// Offline mode: block all network access (Sovereign AI compliance)
63        #[arg(long)]
64        offline: bool,
65        /// Benchmark mode: output performance metrics (tok/s, latency)
66        #[arg(long)]
67        benchmark: bool,
68        /// Enable inference tracing (APR-TRACE-001)
69        #[arg(long)]
70        trace: bool,
71        /// Trace specific steps only (comma-separated)
72        #[arg(long, value_delimiter = ',')]
73        trace_steps: Option<Vec<String>>,
74        /// Verbose tracing (show tensor values)
75        #[arg(long)]
76        trace_verbose: bool,
77        /// Save trace output to JSON file
78        #[arg(long, value_name = "FILE")]
79        trace_output: Option<PathBuf>,
80        /// Trace detail level (none, basic, layer, payload, chrome)
81        /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
82        /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
83        #[arg(long, value_name = "LEVEL", default_value = "basic")]
84        trace_level: String,
85        /// Shorthand for --trace --trace-level payload (tensor value inspection)
86        #[arg(long)]
87        trace_payload: bool,
88        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
89        #[arg(long)]
90        profile: bool,
91        /// Apply chat template for Instruct models (GAP-UX-001)
92        ///
93        /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
94        /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
95        #[arg(long)]
96        chat: bool,
97        /// Sampling temperature (0.0 = greedy, default: 0.0)
98        #[arg(long, default_value = "0.0")]
99        temperature: f32,
100        /// Top-k sampling (default: 1 = greedy)
101        #[arg(long, default_value = "1")]
102        top_k: usize,
103        /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
104        /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
105        #[arg(long)]
106        top_p: Option<f32>,
107        /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
108        /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
109        #[arg(long, default_value = "299792458")]
110        seed: u64,
111        /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
112        /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
113        #[arg(long, default_value = "1.0")]
114        repeat_penalty: f32,
115        /// Context window for repetition penalty (number of recent tokens to check)
116        /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
117        #[arg(long, default_value = "64")]
118        repeat_last_n: usize,
119        /// Process prompt tokens one-by-one instead of batched prefill.
120        /// Useful for debugging prefill correctness (comparing per-token attention).
121        /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
122        #[arg(long)]
123        split_prompt: bool,
124        /// Batch mode: read prompts from JSONL, output results as JSONL.
125        /// Model loads once, processes all prompts sequentially.
126        /// Each input line: {"prompt": "...", "task_id": "..."}
127        /// Chat template is applied automatically.
128        #[arg(long, value_name = "FILE")]
129        batch_jsonl: Option<PathBuf>,
130        /// Show verbose output (model loading, backend info)
131        #[arg(short, long)]
132        verbose: bool,
133        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
134        #[arg(long, value_name = "BACKEND")]
135        backend: Option<String>,
136    },
137    /// Inference server (plan/run)
138    Serve {
139        #[command(subcommand)]
140        command: ServeCommands,
141    },
142    /// Inspect model metadata, vocab, and structure
143    Inspect {
144        /// Path to .apr model file
145        #[arg(value_name = "FILE")]
146        file: PathBuf,
147        /// Show vocabulary details
148        #[arg(long)]
149        vocab: bool,
150        /// Show filter/security details
151        #[arg(long)]
152        filters: bool,
153        /// Show weight statistics
154        #[arg(long)]
155        weights: bool,
156        /// Output as JSON
157        #[arg(long)]
158        json: bool,
159        /// Emit a 0-100 model quality score block.
160        ///
161        /// Per SPEC-SHIP-TWO-001 §84 P3-A (AC-SHIP2-007 quality
162        /// threshold ≥ 90). The score aggregates: physics checks
163        /// (no NaN/Inf, no all-zero tensors), structural
164        /// completeness (architecture / hidden_size / num_layers
165        /// metadata present), provenance (license + data_source +
166        /// data_license non-empty), HF identity (hf_architecture
167        /// stamped per PMAT-690 P0-K), and tokenizer presence
168        /// (has_vocab + embedded merges). A ship-blocking model
169        /// MUST score ≥ 90 by this rubric.
170        #[arg(long)]
171        quality: bool,
172    },
173    /// Simple debugging output ("drama" mode available)
174    Debug {
175        /// Path to .apr model file
176        #[arg(value_name = "FILE")]
177        file: PathBuf,
178        /// Theatrical "drama" mode output
179        #[arg(long)]
180        drama: bool,
181        /// Show hex dump
182        #[arg(long)]
183        hex: bool,
184        /// Extract ASCII strings
185        #[arg(long)]
186        strings: bool,
187        /// Limit output lines
188        #[arg(long, default_value = "256")]
189        limit: usize,
190    },
191    /// Validate model integrity and quality
192    Validate {
193        /// Path to .apr model file
194        #[arg(value_name = "FILE")]
195        file: PathBuf,
196        /// Show 100-point quality assessment
197        #[arg(long)]
198        quality: bool,
199        /// Strict validation (fail on warnings)
200        #[arg(long)]
201        strict: bool,
202        /// Minimum score to pass (0-100)
203        #[arg(long)]
204        min_score: Option<u8>,
205    },
206    /// Validate a publish manifest (FALSIFY-PM-001..006).
207    ///
208    /// Contract: `contracts/publish-manifest-v1.yaml`
209    /// Spec:     SPEC-SHIP-TWO-001 §12.3 AC-EX-004
210    ValidateManifest {
211        /// Path to manifest YAML
212        #[arg(value_name = "MANIFEST")]
213        file: PathBuf,
214        /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
215        #[arg(long, value_name = "APR_FILE")]
216        artifact: Option<PathBuf>,
217        /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
218        /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
219        /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
220        #[arg(long)]
221        live: bool,
222    },
223    /// Compare two models
224    Diff {
225        /// First model file
226        #[arg(value_name = "FILE1")]
227        file1: PathBuf,
228        /// Second model file
229        #[arg(value_name = "FILE2")]
230        file2: PathBuf,
231        /// Show weight-level differences
232        #[arg(long)]
233        weights: bool,
234        /// Compare actual tensor values with statistical analysis
235        #[arg(long)]
236        values: bool,
237        /// Filter tensors by name pattern (for --values)
238        #[arg(long)]
239        filter: Option<String>,
240        /// Maximum number of tensors to compare (for --values)
241        #[arg(long, default_value = "10")]
242        limit: usize,
243        /// Account for transpose when comparing (GGUF col-major vs APR row-major)
244        #[arg(long)]
245        transpose_aware: bool,
246        /// Output as JSON
247        #[arg(long)]
248        json: bool,
249        /// CRUX-B-20: per-tensor quant roundtrip error report (RMSE / cosine / max_abs).
250        /// FILE1 is the reference (fp16/fp32/bf16); FILE2 is the quantized variant.
251        #[arg(long)]
252        quant_roundtrip: bool,
253        /// CRUX-B-20: cosine threshold for the quant-roundtrip exit-code gate.
254        /// Any tensor with cosine < threshold makes the command exit non-zero.
255        #[arg(long, default_value = "0.95")]
256        threshold: f32,
257        /// CRUX-B-20: suppress the threshold exit-code gate (still emits the report).
258        #[arg(long)]
259        no_threshold: bool,
260    },
261    /// List tensor names and shapes
262    Tensors {
263        /// Path to .apr model file
264        #[arg(value_name = "FILE")]
265        file: PathBuf,
266        /// Show tensor statistics (mean, std, min, max)
267        #[arg(long)]
268        stats: bool,
269        /// Filter tensors by name pattern
270        #[arg(long)]
271        filter: Option<String>,
272        /// Limit number of tensors shown (0 = unlimited)
273        #[arg(long, default_value = "0")]
274        limit: usize,
275        /// Output as JSON
276        #[arg(long)]
277        json: bool,
278    },
279    /// Layer-by-layer trace analysis
280    Trace {
281        /// Path to .apr model file
282        #[arg(value_name = "FILE")]
283        file: PathBuf,
284        /// Filter layers by name pattern
285        #[arg(long)]
286        layer: Option<String>,
287        /// Compare with reference model
288        #[arg(long)]
289        reference: Option<PathBuf>,
290        /// Output as JSON
291        #[arg(long)]
292        json: bool,
293        /// Verbose output with per-layer stats
294        #[arg(short, long)]
295        verbose: bool,
296        /// Trace payload through model
297        #[arg(long)]
298        payload: bool,
299        /// Diff mode
300        #[arg(long)]
301        diff: bool,
302        /// Interactive mode
303        #[arg(long)]
304        interactive: bool,
305        /// Save per-stage F32 tensors during trace for SHIP-007 layer-0
306        /// element-wise diff. Comma-separated stage names from
307        /// `apr-cli-trace-save-tensor-v1.yaml` (e.g.
308        /// `embedding,qkv_matmul,attention`). Pass `all` to save every
309        /// stage. Output goes to `--save-tensor-dir` if provided,
310        /// else `<file_dir>/trace-tensors/<run_id>/`.
311        #[arg(long, value_name = "STAGES")]
312        save_tensor: Option<String>,
313        /// Output directory for `--save-tensor` (default: sibling
314        /// `trace-tensors/<run_id>/`).
315        #[arg(long, value_name = "DIR")]
316        save_tensor_dir: Option<PathBuf>,
317        /// Layer-id range for `--save-tensor` (default: 0..1, i.e.
318        /// layer 0 only). Format: `START..END` (Rust range syntax,
319        /// END exclusive).
320        #[arg(long, value_name = "RANGE", default_value = "0..1")]
321        save_tensor_layers: String,
322    },
323    /// Check for best practices and conventions
324    Lint {
325        /// Path to .apr model file
326        #[arg(value_name = "FILE")]
327        file: PathBuf,
328    },
329    /// Emit a SHA-256 manifest of input files (CRUX-G-05)
330    Manifest {
331        /// Files to include in the manifest (one entry per file)
332        #[arg(value_name = "FILES", num_args = 1..)]
333        files: Vec<PathBuf>,
334        /// Output JSON manifest path
335        #[arg(short, long, value_name = "MAN_JSON")]
336        output: PathBuf,
337    },
338    /// Explain errors, architecture, tensors, and kernel dispatch
339    Explain {
340        /// Error code, model file path, or family name (auto-detected)
341        #[arg(value_name = "CODE_OR_FILE")]
342        code_or_file: Option<String>,
343        /// Path to .apr model file (optional context for --tensor)
344        #[arg(short, long)]
345        file: Option<PathBuf>,
346        /// Explain a specific tensor
347        #[arg(long)]
348        tensor: Option<String>,
349        /// Explain kernel dispatch pipeline for architecture
350        #[arg(long)]
351        kernel: bool,
352        /// Output as JSON
353        #[arg(long)]
354        json: bool,
355        /// Show kernel contract details and proof obligations
356        #[arg(short, long)]
357        verbose: bool,
358        /// Show per-kernel proof status from contract tests
359        #[arg(long)]
360        proof_status: bool,
361    },
362    /// Manage canary tests for regression
363    Canary {
364        #[command(subcommand)]
365        command: CanaryCommands,
366    },
367    /// Export model to other formats
368    Export {
369        /// Path to .apr model file
370        #[arg(value_name = "FILE", required_unless_present = "list_formats")]
371        file: Option<PathBuf>,
372        /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
373        #[arg(long, default_value = "safetensors")]
374        format: String,
375        /// Output file/directory path
376        #[arg(short, long)]
377        output: Option<PathBuf>,
378        /// Apply quantization during export (int8, int4, fp16)
379        #[arg(long)]
380        quantize: Option<String>,
381        /// List all supported export formats
382        #[arg(long)]
383        list_formats: bool,
384        /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
385        #[arg(long)]
386        batch: Option<String>,
387        /// Output in JSON format
388        #[arg(long)]
389        json: bool,
390        /// Plan mode (validate inputs, show export plan, no execution)
391        #[arg(long)]
392        plan: bool,
393    },
394    /// Import from external formats (hf://org/repo, local files, URLs)
395    Import {
396        /// Source: hf://org/repo, local file, or URL
397        #[arg(value_name = "SOURCE")]
398        source: String,
399        /// Output .apr file path (default: derived from source name)
400        #[arg(short, long)]
401        output: Option<PathBuf>,
402        /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
403        #[arg(long, default_value = "auto")]
404        arch: String,
405        /// Quantization (int8, int4, fp16)
406        #[arg(long)]
407        quantize: Option<String>,
408        /// Strict mode: reject unverified architectures and fail on validation errors
409        #[arg(long)]
410        strict: bool,
411        /// Preserve Q4K quantization for fused kernel inference (GGUF only)
412        /// Uses realizar's Q4K converter instead of dequantizing to F32
413        #[arg(long)]
414        preserve_q4k: bool,
415        /// PMAT-232: External tokenizer.json for weights-only GGUF files.
416        /// Required if the GGUF has no embedded tokenizer vocabulary.
417        #[arg(long)]
418        tokenizer: Option<PathBuf>,
419        /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
420        /// (only SafeTensors sources allowed). Ensures single-provenance testing.
421        #[arg(long)]
422        enforce_provenance: bool,
423        /// GH-223: Allow import without config.json (default: error).
424        /// Without config.json, hyperparameters like rope_theta are inferred from
425        /// tensor shapes and may be wrong, producing garbage output.
426        #[arg(long)]
427        allow_no_config: bool,
428    },
429    /// Download and cache model OR HuggingFace dataset (Ollama-like UX)
430    Pull {
431        /// Model reference (alias, hf:// URI, or org/repo) OR "dataset"
432        /// asset-type discriminator. When this value is the literal
433        /// string "dataset", the next positional `repo` is the
434        /// HuggingFace dataset repo and dataset-pull semantics apply.
435        #[arg(value_name = "MODEL_OR_ASSET_TYPE")]
436        model_ref: String,
437        /// Dataset repository (used only when model_ref == "dataset").
438        /// Per `apr-cli-pull-dataset-v1.yaml`.
439        #[arg(value_name = "REPO")]
440        repo: Option<String>,
441        /// Force re-download even if cached
442        #[arg(long)]
443        force: bool,
444        /// CRUX-A-01: resolve short name to canonical URL and exit without
445        /// performing any network I/O.
446        #[arg(long)]
447        dry_run: bool,
448        /// CRUX-A-03: pin to a specific branch, tag, or git SHA on the remote
449        /// (HuggingFace Hub). Defaults to "main" when omitted.
450        #[arg(long, value_name = "REV")]
451        revision: Option<String>,
452        /// CRUX-A-20: offline mode — forbid any outbound network I/O.
453        /// Equivalent to APR_OFFLINE=1 or HF_HUB_OFFLINE=1 in the environment.
454        #[arg(long)]
455        offline: bool,
456        /// (dataset mode) Glob pattern for shard selection. May be passed
457        /// multiple times; matches are unioned. fnmatch-compatible
458        /// (`*`, `?`, `[a-z]`). No-match is fail-fast.
459        #[arg(long, value_name = "GLOB")]
460        include: Vec<String>,
461        /// (dataset mode) Output directory. Default:
462        /// `~/.cache/aprender/datasets/<repo>/`.
463        #[arg(short = 'o', long)]
464        output: Option<PathBuf>,
465    },
466    /// Registry operations (CRUX-A-01): inspect alias map, etc.
467    Registry {
468        #[command(subcommand)]
469        command: crate::commands::registry::RegistryCommands,
470    },
471    /// List cached models
472    #[command(name = "list", alias = "ls")]
473    List,
474    /// Remove model from cache
475    #[command(name = "rm", alias = "remove")]
476    Rm {
477        /// Model reference to remove
478        #[arg(value_name = "MODEL")]
479        model_ref: String,
480    },
481    /// Convert/optimize model
482    Convert {
483        /// Path to .apr model file
484        #[arg(value_name = "FILE")]
485        file: PathBuf,
486        /// Quantize to format (int8, int4, fp16, q4k)
487        #[arg(long)]
488        quantize: Option<String>,
489        /// Compress output (none, zstd, zstd-max, lz4)
490        #[arg(long)]
491        compress: Option<String>,
492        /// Output file path
493        #[arg(short, long)]
494        output: PathBuf,
495        /// Force overwrite existing files
496        #[arg(short, long)]
497        force: bool,
498    },
499    /// Stamp provenance fields (license, data_source, data_license) onto an existing .apr file
500    ///
501    /// SHIP-009 full-discharge enabler — patches the three provenance fields on
502    /// a pre-built APR v2 artifact (e.g., the shipped MODEL-1 teacher whose
503    /// fields are all (missing) because it was built before GATE-APR-PROV-001..003
504    /// shipped). Tensor bytes and header flags are preserved verbatim.
505    Stamp {
506        /// Path to input .apr model file
507        #[arg(value_name = "FILE")]
508        file: PathBuf,
509        /// SPDX license identifier (e.g., Apache-2.0)
510        #[arg(long)]
511        license: Option<String>,
512        /// Training-data source (e.g., huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
513        #[arg(long = "data-source")]
514        data_source: Option<String>,
515        /// SPDX license for data_source (e.g., Apache-2.0)
516        #[arg(long = "data-license")]
517        data_license: Option<String>,
518        /// HuggingFace class name (e.g., Qwen2ForCausalLM, LlamaForCausalLM).
519        ///
520        /// PMAT-690 P0-K extension (SPEC §86): patch the upstream
521        /// `architectures[0]` stamp on a pre-P0-K APR so downstream
522        /// consumers (apr inspect --quality, apr pretrain --init,
523        /// apr export → llama-cli) see the correct HF identity.
524        #[arg(long = "hf-architecture")]
525        hf_architecture: Option<String>,
526        /// HuggingFace model_type slug (e.g., qwen2, llama).
527        ///
528        /// PMAT-690 P0-K extension (SPEC §86).
529        #[arg(long = "hf-model-type")]
530        hf_model_type: Option<String>,
531        /// Lowercase architecture family slug (e.g., qwen2, llama).
532        ///
533        /// PMAT-690 P0-K extension (SPEC §86). This is the field
534        /// `apr pretrain --init` reads for arch dispatch — without
535        /// patching it, pre-P0-K checkpoints with the P0-H "LlamaForCausalLM"
536        /// fallback in this field cannot be loaded as Qwen2 inits.
537        #[arg(long)]
538        architecture: Option<String>,
539        /// Directory containing tokenizer files (vocab.json + merges.txt
540        /// OR tokenizer.json). When provided, embeds the vocabulary +
541        /// BPE merges into the APR's `custom.tokenizer.vocabulary` /
542        /// `custom.tokenizer.merges` JSON metadata AND sets the
543        /// HAS_VOCAB header flag.
544        ///
545        /// PMAT-690 P3-C-prep defect 1 fix (2026-05-17): pre-P0-K APRs
546        /// trained from inits without embedded tokenizers fail `apr run`
547        /// with PMAT-172. This flag lets the §86 salvage recipe embed
548        /// the tokenizer post-hoc so the artifact is self-contained
549        /// for inference (the apr binary's headline use case).
550        #[arg(long = "tokenizer", value_name = "DIR")]
551        tokenizer_dir: Option<PathBuf>,
552        /// Output file path
553        #[arg(short, long)]
554        output: PathBuf,
555        /// Force overwrite existing files
556        #[arg(short, long)]
557        force: bool,
558    },
559    /// Compile model into standalone executable (APR-SPEC §4.16)
560    Compile {
561        /// Input .apr model file
562        #[arg(value_name = "FILE", required_unless_present = "list_targets")]
563        file: Option<PathBuf>,
564        /// Output binary path (default: derived from model name)
565        #[arg(short, long)]
566        output: Option<PathBuf>,
567        /// Target triple (e.g., x86_64-unknown-linux-musl)
568        #[arg(long)]
569        target: Option<String>,
570        /// Quantize weights before embedding (int8, int4, fp16)
571        #[arg(long)]
572        quantize: Option<String>,
573        /// Release mode (optimized)
574        #[arg(long)]
575        release: bool,
576        /// Strip debug symbols
577        #[arg(long)]
578        strip: bool,
579        /// Enable LTO (Link-Time Optimization)
580        #[arg(long)]
581        lto: bool,
582        /// List available compilation targets
583        #[arg(long)]
584        list_targets: bool,
585    },
586    /// Merge multiple models
587    Merge {
588        /// Model files to merge
589        #[arg(value_name = "FILES", num_args = 2..)]
590        files: Vec<PathBuf>,
591        /// Merge strategy (average, weighted, slerp, ties, dare)
592        #[arg(long, default_value = "average")]
593        strategy: String,
594        /// Output file path (optional in --plan mode)
595        #[arg(short, long, required_unless_present = "plan")]
596        output: Option<PathBuf>,
597        /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
598        #[arg(long, value_delimiter = ',')]
599        weights: Option<Vec<f32>>,
600        /// Base model for TIES/DARE (task vectors computed as delta from base)
601        #[arg(long)]
602        base_model: Option<PathBuf>,
603        /// DARE drop probability (default: 0.9)
604        #[arg(long, default_value = "0.9")]
605        drop_rate: f32,
606        /// TIES trim density threshold (default: 0.2)
607        #[arg(long, default_value = "0.2")]
608        density: f32,
609        /// RNG seed for DARE (default: 42)
610        #[arg(long, default_value = "42")]
611        seed: u64,
612        /// Plan mode (validate inputs, show merge plan, no execution)
613        #[arg(long)]
614        plan: bool,
615    },
616    /// Quantize model weights (GH-243)
617    Quantize {
618        /// Input model file
619        #[arg(value_name = "FILE")]
620        file: PathBuf,
621        /// Quantization scheme: int8, int4, fp16, q4k
622        #[arg(long, short = 's', default_value = "int4")]
623        scheme: String,
624        /// Output file path (required unless --plan)
625        #[arg(short, long)]
626        output: Option<PathBuf>,
627        /// Output format override (apr, gguf, safetensors)
628        #[arg(long)]
629        format: Option<String>,
630        /// Batch quantization (comma-separated schemes)
631        #[arg(long)]
632        batch: Option<String>,
633        /// Plan mode (estimate only, no execution)
634        #[arg(long)]
635        plan: bool,
636        /// Force overwrite existing files
637        #[arg(short, long)]
638        force: bool,
639    },
640    /// Model optimization commands (fine-tune, prune, distill)
641    #[command(flatten)]
642    ModelOps(ModelOpsCommands),
643    /// Start the MCP (Model Context Protocol) server over stdio
644    ///
645    /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
646    /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
647    Mcp {},
648    /// Interactive terminal UI
649    Tui {
650        /// Path to .apr model file
651        #[arg(value_name = "FILE")]
652        file: Option<PathBuf>,
653    },
654    /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
655    Check {
656        /// Path to model file
657        #[arg(value_name = "FILE")]
658        file: PathBuf,
659        /// Disable GPU acceleration
660        #[arg(long)]
661        no_gpu: bool,
662        /// Output as JSON
663        #[arg(long)]
664        json: bool,
665    },
666    /// GPU status and VRAM reservation management (GPU-SHARE-001)
667    #[cfg(feature = "training")]
668    Gpu {
669        /// Show reservations as JSON
670        #[arg(long)]
671        json: bool,
672    },
673    /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
674    
675    Code {
676        /// Path to local GGUF/APR model file (prefers .apr format)
677        #[arg(long)]
678        model: Option<PathBuf>,
679
680        /// Project directory (loads APR.md/CLAUDE.md from this path)
681        #[arg(long, default_value = ".")]
682        project: PathBuf,
683
684        /// Resume previous session (optionally by ID)
685        #[arg(long)]
686        resume: Option<Option<String>>,
687
688        /// Agent manifest (advanced — overrides defaults)
689        #[arg(long)]
690        manifest: Option<PathBuf>,
691
692        /// Initial prompt (non-interactive: print response and exit)
693        #[arg(short, long)]
694        print: bool,
695
696        /// Prompt text (positional, for -p mode)
697        #[arg(trailing_var_arg = true)]
698        prompt: Vec<String>,
699
700        /// Max turns before stopping
701        #[arg(long, default_value = "50")]
702        max_turns: u32,
703
704        /// Emit a `ccpa-trace.jsonl` describing the run to this path.
705        /// Format mirrors the schema at
706        /// <https://github.com/paiml/claude-code-parity-apr/blob/main/contracts/claude-code-parity-apr-v1.yaml>
707        /// (`§ trace_schema`). Used by `ccpa measure` to score apr-code
708        /// against canonical Claude Code reference fixtures.
709        #[arg(long)]
710        emit_trace: Option<PathBuf>,
711
712        /// Output format for non-interactive (`-p`) mode (PMAT-CODE-OUTPUT-FORMAT-001).
713        /// `text` (default): plain assistant text.
714        /// `json`: structured `{type:"result", subtype:"success", result, session_id, duration_ms}`
715        /// envelope matching Claude Code's `claude -p --output-format json` shape.
716        #[arg(long, value_enum, default_value_t = CodeOutputFormat::Text)]
717        output_format: CodeOutputFormat,
718
719        /// Input format for non-interactive stdin (PMAT-CODE-INPUT-FORMAT-001).
720        /// `text` (default): treat stdin as raw prompt text.
721        /// `json`: parse `{"role":"user","content":"..."}` from stdin and use `content`
722        /// as the prompt. Matches Claude Code's `claude -p --input-format json` shape.
723        #[arg(long, value_enum, default_value_t = CodeInputFormat::Text)]
724        input_format: CodeInputFormat,
725    },
726    /// Extended analysis, profiling, QA, and visualization commands
727    #[command(flatten)]
728    Extended(ExtendedCommands),
729
730    /// Monorepo management (publish, shims, audit, archive) [dev-only]
731    #[cfg(feature = "dev")]
732    #[command(subcommand)]
733    Mono(crate::commands::mono::MonoCommands),
734}