apr_cli/commands_enum.rs
1
2/// Output format for `apr code` non-interactive mode (PMAT-CODE-OUTPUT-FORMAT-001).
3/// Mirrors Claude Code's `claude -p --output-format <fmt>` parity row.
4#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
5pub enum CodeOutputFormat {
6 /// Plain assistant text on stdout (default; existing behavior).
7 #[default]
8 Text,
9 /// Structured JSON envelope: `{type:"result", subtype:"success", result, session_id, duration_ms}`.
10 Json,
11}
12
13/// Input format for `apr code` non-interactive mode (PMAT-CODE-INPUT-FORMAT-001).
14/// `--input-format json` reads `{"role":"user","content":"..."}` from stdin instead
15/// of treating stdin as raw prompt text.
16#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Default)]
17pub enum CodeInputFormat {
18 /// Raw prompt text from positional args or stdin (default; existing behavior).
19 #[default]
20 Text,
21 /// JSON message envelope on stdin: `{"role":"user","content":"..."}`.
22 Json,
23}
24
25#[derive(Subcommand, Debug)]
26pub enum Commands {
27 /// Run model directly (auto-download, cache, execute)
28 Run {
29 /// Model source: local path, hf://org/repo, or URL
30 #[arg(value_name = "SOURCE")]
31 source: String,
32 /// Text prompt (positional): `apr run model.gguf "What is 2+2?"`
33 #[arg(value_name = "PROMPT")]
34 positional_prompt: Option<String>,
35 /// Input file (audio, text, etc.)
36 #[arg(short, long)]
37 input: Option<PathBuf>,
38 /// Text prompt for generation (for LLM models)
39 #[arg(short, long)]
40 prompt: Option<String>,
41 /// Maximum tokens to generate (default: 32)
42 #[arg(short = 'n', long, default_value = "32")]
43 max_tokens: usize,
44 /// Enable streaming output
45 #[arg(long)]
46 stream: bool,
47 /// Language code (for ASR models)
48 #[arg(short, long)]
49 language: Option<String>,
50 /// Task (transcribe, translate)
51 #[arg(short, long)]
52 task: Option<String>,
53 /// Output format (text, json, srt, vtt)
54 #[arg(short = 'f', long, default_value = "text")]
55 format: String,
56 /// Disable GPU acceleration (force CPU-only inference)
57 #[arg(long, alias = "cpu", conflicts_with = "gpu")]
58 no_gpu: bool,
59 /// Force GPU acceleration
60 #[arg(long, conflicts_with = "no_gpu")]
61 gpu: bool,
62 /// Offline mode: block all network access (Sovereign AI compliance)
63 #[arg(long)]
64 offline: bool,
65 /// Benchmark mode: output performance metrics (tok/s, latency)
66 #[arg(long)]
67 benchmark: bool,
68 /// Enable inference tracing (APR-TRACE-001)
69 #[arg(long)]
70 trace: bool,
71 /// Trace specific steps only (comma-separated)
72 #[arg(long, value_delimiter = ',')]
73 trace_steps: Option<Vec<String>>,
74 /// Verbose tracing (show tensor values)
75 #[arg(long)]
76 trace_verbose: bool,
77 /// Save trace output to JSON file
78 #[arg(long, value_name = "FILE")]
79 trace_output: Option<PathBuf>,
80 /// Trace detail level (none, basic, layer, payload, chrome)
81 /// "chrome" outputs chrome://tracing JSON integrating layer trace + brick profile.
82 /// F-CLIPARITY-01 / PMAT-386 / paiml/aprender#574
83 #[arg(long, value_name = "LEVEL", default_value = "basic")]
84 trace_level: String,
85 /// Shorthand for --trace --trace-level payload (tensor value inspection)
86 #[arg(long)]
87 trace_payload: bool,
88 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
89 #[arg(long)]
90 profile: bool,
91 /// Apply chat template for Instruct models (GAP-UX-001)
92 ///
93 /// Wraps prompt in ChatML format for Qwen2, LLaMA, Mistral Instruct models.
94 /// Format: <|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n
95 #[arg(long)]
96 chat: bool,
97 /// Sampling temperature (0.0 = greedy, default: 0.0)
98 #[arg(long, default_value = "0.0")]
99 temperature: f32,
100 /// Top-k sampling (default: 1 = greedy)
101 #[arg(long, default_value = "1")]
102 top_k: usize,
103 /// Top-p nucleus sampling (0.0 = disabled). When set with --top-k, applies top-k first then top-p.
104 /// F-CLIPARITY-01 / PMAT-381 / paiml/aprender#569
105 #[arg(long)]
106 top_p: Option<f32>,
107 /// RNG seed for deterministic sampling (default: 299792458, matching Candle)
108 /// F-CLIPARITY-01 / PMAT-382 / paiml/aprender#570
109 #[arg(long, default_value = "299792458")]
110 seed: u64,
111 /// Repetition penalty (1.0 = no penalty, >1.0 penalizes repeats)
112 /// F-CLIPARITY-01 / PMAT-383 / paiml/aprender#571
113 #[arg(long, default_value = "1.0")]
114 repeat_penalty: f32,
115 /// Context window for repetition penalty (number of recent tokens to check)
116 /// F-CLIPARITY-01 / PMAT-384 / paiml/aprender#571
117 #[arg(long, default_value = "64")]
118 repeat_last_n: usize,
119 /// Process prompt tokens one-by-one instead of batched prefill.
120 /// Useful for debugging prefill correctness (comparing per-token attention).
121 /// F-CLIPARITY-01 / PMAT-385 / paiml/aprender#572
122 #[arg(long)]
123 split_prompt: bool,
124 /// Batch mode: read prompts from JSONL, output results as JSONL.
125 /// Model loads once, processes all prompts sequentially.
126 /// Each input line: {"prompt": "...", "task_id": "..."}
127 /// Chat template is applied automatically.
128 #[arg(long, value_name = "FILE")]
129 batch_jsonl: Option<PathBuf>,
130 /// Show verbose output (model loading, backend info)
131 #[arg(short, long)]
132 verbose: bool,
133 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
134 #[arg(long, value_name = "BACKEND")]
135 backend: Option<String>,
136 },
137 /// Inference server (plan/run)
138 Serve {
139 #[command(subcommand)]
140 command: ServeCommands,
141 },
142 /// Inspect model metadata, vocab, and structure
143 Inspect {
144 /// Path to .apr model file
145 #[arg(value_name = "FILE")]
146 file: PathBuf,
147 /// Show vocabulary details
148 #[arg(long)]
149 vocab: bool,
150 /// Show filter/security details
151 #[arg(long)]
152 filters: bool,
153 /// Show weight statistics
154 #[arg(long)]
155 weights: bool,
156 /// Output as JSON
157 #[arg(long)]
158 json: bool,
159 /// Emit a 0-100 model quality score block.
160 ///
161 /// Per SPEC-SHIP-TWO-001 §84 P3-A (AC-SHIP2-007 quality
162 /// threshold ≥ 90). The score aggregates: physics checks
163 /// (no NaN/Inf, no all-zero tensors), structural
164 /// completeness (architecture / hidden_size / num_layers
165 /// metadata present), provenance (license + data_source +
166 /// data_license non-empty), HF identity (hf_architecture
167 /// stamped per PMAT-690 P0-K), and tokenizer presence
168 /// (has_vocab + embedded merges). A ship-blocking model
169 /// MUST score ≥ 90 by this rubric.
170 #[arg(long)]
171 quality: bool,
172 },
173 /// Simple debugging output ("drama" mode available)
174 Debug {
175 /// Path to .apr model file
176 #[arg(value_name = "FILE")]
177 file: PathBuf,
178 /// Theatrical "drama" mode output
179 #[arg(long)]
180 drama: bool,
181 /// Show hex dump
182 #[arg(long)]
183 hex: bool,
184 /// Extract ASCII strings
185 #[arg(long)]
186 strings: bool,
187 /// Limit output lines
188 #[arg(long, default_value = "256")]
189 limit: usize,
190 },
191 /// Validate model integrity and quality
192 Validate {
193 /// Path to .apr model file
194 #[arg(value_name = "FILE")]
195 file: PathBuf,
196 /// Show 100-point quality assessment
197 #[arg(long)]
198 quality: bool,
199 /// Strict validation (fail on warnings)
200 #[arg(long)]
201 strict: bool,
202 /// Minimum score to pass (0-100)
203 #[arg(long)]
204 min_score: Option<u8>,
205 },
206 /// Validate a publish manifest (FALSIFY-PM-001..006).
207 ///
208 /// Contract: `contracts/publish-manifest-v1.yaml`
209 /// Spec: SPEC-SHIP-TWO-001 §12.3 AC-EX-004
210 ValidateManifest {
211 /// Path to manifest YAML
212 #[arg(value_name = "MANIFEST")]
213 file: PathBuf,
214 /// Optional local .apr artifact to discharge FALSIFY-PM-002 (sha256 match)
215 #[arg(long, value_name = "APR_FILE")]
216 artifact: Option<PathBuf>,
217 /// Discharge FALSIFY-PM-003 via network: HTTP HEAD + streaming sha256.
218 /// Default is DEFERRED (offline-safe). Ignored when --offline is set.
219 /// Closes F-PUBLISH-EXTRA-001::dogfood_ex05 (no Python in ex-05).
220 #[arg(long)]
221 live: bool,
222 },
223 /// Compare two models
224 Diff {
225 /// First model file
226 #[arg(value_name = "FILE1")]
227 file1: PathBuf,
228 /// Second model file
229 #[arg(value_name = "FILE2")]
230 file2: PathBuf,
231 /// Show weight-level differences
232 #[arg(long)]
233 weights: bool,
234 /// Compare actual tensor values with statistical analysis
235 #[arg(long)]
236 values: bool,
237 /// Filter tensors by name pattern (for --values)
238 #[arg(long)]
239 filter: Option<String>,
240 /// Maximum number of tensors to compare (for --values)
241 #[arg(long, default_value = "10")]
242 limit: usize,
243 /// Account for transpose when comparing (GGUF col-major vs APR row-major)
244 #[arg(long)]
245 transpose_aware: bool,
246 /// Output as JSON
247 #[arg(long)]
248 json: bool,
249 /// CRUX-B-20: per-tensor quant roundtrip error report (RMSE / cosine / max_abs).
250 /// FILE1 is the reference (fp16/fp32/bf16); FILE2 is the quantized variant.
251 #[arg(long)]
252 quant_roundtrip: bool,
253 /// CRUX-B-20: cosine threshold for the quant-roundtrip exit-code gate.
254 /// Any tensor with cosine < threshold makes the command exit non-zero.
255 #[arg(long, default_value = "0.95")]
256 threshold: f32,
257 /// CRUX-B-20: suppress the threshold exit-code gate (still emits the report).
258 #[arg(long)]
259 no_threshold: bool,
260 },
261 /// List tensor names and shapes
262 Tensors {
263 /// Path to .apr model file
264 #[arg(value_name = "FILE")]
265 file: PathBuf,
266 /// Show tensor statistics (mean, std, min, max)
267 #[arg(long)]
268 stats: bool,
269 /// Filter tensors by name pattern
270 #[arg(long)]
271 filter: Option<String>,
272 /// Limit number of tensors shown (0 = unlimited)
273 #[arg(long, default_value = "0")]
274 limit: usize,
275 /// Output as JSON
276 #[arg(long)]
277 json: bool,
278 },
279 /// Layer-by-layer trace analysis
280 Trace {
281 /// Path to .apr model file
282 #[arg(value_name = "FILE")]
283 file: PathBuf,
284 /// Filter layers by name pattern
285 #[arg(long)]
286 layer: Option<String>,
287 /// Compare with reference model
288 #[arg(long)]
289 reference: Option<PathBuf>,
290 /// Output as JSON
291 #[arg(long)]
292 json: bool,
293 /// Verbose output with per-layer stats
294 #[arg(short, long)]
295 verbose: bool,
296 /// Trace payload through model
297 #[arg(long)]
298 payload: bool,
299 /// Diff mode
300 #[arg(long)]
301 diff: bool,
302 /// Interactive mode
303 #[arg(long)]
304 interactive: bool,
305 /// Save per-stage F32 tensors during trace for SHIP-007 layer-0
306 /// element-wise diff. Comma-separated stage names from
307 /// `apr-cli-trace-save-tensor-v1.yaml` (e.g.
308 /// `embedding,qkv_matmul,attention`). Pass `all` to save every
309 /// stage. Output goes to `--save-tensor-dir` if provided,
310 /// else `<file_dir>/trace-tensors/<run_id>/`.
311 #[arg(long, value_name = "STAGES")]
312 save_tensor: Option<String>,
313 /// Output directory for `--save-tensor` (default: sibling
314 /// `trace-tensors/<run_id>/`).
315 #[arg(long, value_name = "DIR")]
316 save_tensor_dir: Option<PathBuf>,
317 /// Layer-id range for `--save-tensor` (default: 0..1, i.e.
318 /// layer 0 only). Format: `START..END` (Rust range syntax,
319 /// END exclusive).
320 #[arg(long, value_name = "RANGE", default_value = "0..1")]
321 save_tensor_layers: String,
322 },
323 /// Check for best practices and conventions
324 Lint {
325 /// Path to .apr model file
326 #[arg(value_name = "FILE")]
327 file: PathBuf,
328 },
329 /// Emit a SHA-256 manifest of input files (CRUX-G-05)
330 Manifest {
331 /// Files to include in the manifest (one entry per file)
332 #[arg(value_name = "FILES", num_args = 1..)]
333 files: Vec<PathBuf>,
334 /// Output JSON manifest path
335 #[arg(short, long, value_name = "MAN_JSON")]
336 output: PathBuf,
337 },
338 /// Explain errors, architecture, tensors, and kernel dispatch
339 Explain {
340 /// Error code, model file path, or family name (auto-detected)
341 #[arg(value_name = "CODE_OR_FILE")]
342 code_or_file: Option<String>,
343 /// Path to .apr model file (optional context for --tensor)
344 #[arg(short, long)]
345 file: Option<PathBuf>,
346 /// Explain a specific tensor
347 #[arg(long)]
348 tensor: Option<String>,
349 /// Explain kernel dispatch pipeline for architecture
350 #[arg(long)]
351 kernel: bool,
352 /// Output as JSON
353 #[arg(long)]
354 json: bool,
355 /// Show kernel contract details and proof obligations
356 #[arg(short, long)]
357 verbose: bool,
358 /// Show per-kernel proof status from contract tests
359 #[arg(long)]
360 proof_status: bool,
361 },
362 /// Manage canary tests for regression
363 Canary {
364 #[command(subcommand)]
365 command: CanaryCommands,
366 },
367 /// Export model to other formats
368 Export {
369 /// Path to .apr model file
370 #[arg(value_name = "FILE", required_unless_present = "list_formats")]
371 file: Option<PathBuf>,
372 /// Output format (safetensors, gguf, mlx, onnx, openvino, coreml)
373 #[arg(long, default_value = "safetensors")]
374 format: String,
375 /// Output file/directory path
376 #[arg(short, long)]
377 output: Option<PathBuf>,
378 /// Apply quantization during export (int8, int4, fp16)
379 #[arg(long)]
380 quantize: Option<String>,
381 /// List all supported export formats
382 #[arg(long)]
383 list_formats: bool,
384 /// Batch export to multiple formats (comma-separated: gguf,mlx,safetensors)
385 #[arg(long)]
386 batch: Option<String>,
387 /// Output in JSON format
388 #[arg(long)]
389 json: bool,
390 /// Plan mode (validate inputs, show export plan, no execution)
391 #[arg(long)]
392 plan: bool,
393 },
394 /// Import from external formats (hf://org/repo, local files, URLs)
395 Import {
396 /// Source: hf://org/repo, local file, or URL
397 #[arg(value_name = "SOURCE")]
398 source: String,
399 /// Output .apr file path (default: derived from source name)
400 #[arg(short, long)]
401 output: Option<PathBuf>,
402 /// Model architecture (whisper, llama, bert, qwen2, qwen3, gpt2, starcoder, gpt-neox, opt, phi, gemma, falcon, mamba, t5, auto)
403 #[arg(long, default_value = "auto")]
404 arch: String,
405 /// Quantization (int8, int4, fp16)
406 #[arg(long)]
407 quantize: Option<String>,
408 /// Strict mode: reject unverified architectures and fail on validation errors
409 #[arg(long)]
410 strict: bool,
411 /// Preserve Q4K quantization for fused kernel inference (GGUF only)
412 /// Uses realizar's Q4K converter instead of dequantizing to F32
413 #[arg(long)]
414 preserve_q4k: bool,
415 /// PMAT-232: External tokenizer.json for weights-only GGUF files.
416 /// Required if the GGUF has no embedded tokenizer vocabulary.
417 #[arg(long)]
418 tokenizer: Option<PathBuf>,
419 /// F-GT-001: Enforce provenance chain. Rejects pre-baked GGUF imports
420 /// (only SafeTensors sources allowed). Ensures single-provenance testing.
421 #[arg(long)]
422 enforce_provenance: bool,
423 /// GH-223: Allow import without config.json (default: error).
424 /// Without config.json, hyperparameters like rope_theta are inferred from
425 /// tensor shapes and may be wrong, producing garbage output.
426 #[arg(long)]
427 allow_no_config: bool,
428 },
429 /// Download and cache model OR HuggingFace dataset (Ollama-like UX)
430 Pull {
431 /// Model reference (alias, hf:// URI, or org/repo) OR "dataset"
432 /// asset-type discriminator. When this value is the literal
433 /// string "dataset", the next positional `repo` is the
434 /// HuggingFace dataset repo and dataset-pull semantics apply.
435 #[arg(value_name = "MODEL_OR_ASSET_TYPE")]
436 model_ref: String,
437 /// Dataset repository (used only when model_ref == "dataset").
438 /// Per `apr-cli-pull-dataset-v1.yaml`.
439 #[arg(value_name = "REPO")]
440 repo: Option<String>,
441 /// Force re-download even if cached
442 #[arg(long)]
443 force: bool,
444 /// CRUX-A-01: resolve short name to canonical URL and exit without
445 /// performing any network I/O.
446 #[arg(long)]
447 dry_run: bool,
448 /// CRUX-A-03: pin to a specific branch, tag, or git SHA on the remote
449 /// (HuggingFace Hub). Defaults to "main" when omitted.
450 #[arg(long, value_name = "REV")]
451 revision: Option<String>,
452 /// CRUX-A-20: offline mode — forbid any outbound network I/O.
453 /// Equivalent to APR_OFFLINE=1 or HF_HUB_OFFLINE=1 in the environment.
454 #[arg(long)]
455 offline: bool,
456 /// (dataset mode) Glob pattern for shard selection. May be passed
457 /// multiple times; matches are unioned. fnmatch-compatible
458 /// (`*`, `?`, `[a-z]`). No-match is fail-fast.
459 #[arg(long, value_name = "GLOB")]
460 include: Vec<String>,
461 /// (dataset mode) Output directory. Default:
462 /// `~/.cache/aprender/datasets/<repo>/`.
463 #[arg(short = 'o', long)]
464 output: Option<PathBuf>,
465 },
466 /// Registry operations (CRUX-A-01): inspect alias map, etc.
467 Registry {
468 #[command(subcommand)]
469 command: crate::commands::registry::RegistryCommands,
470 },
471 /// List cached models
472 #[command(name = "list", alias = "ls")]
473 List,
474 /// Remove model from cache
475 #[command(name = "rm", alias = "remove")]
476 Rm {
477 /// Model reference to remove
478 #[arg(value_name = "MODEL")]
479 model_ref: String,
480 },
481 /// Convert/optimize model
482 Convert {
483 /// Path to .apr model file
484 #[arg(value_name = "FILE")]
485 file: PathBuf,
486 /// Quantize to format (int8, int4, fp16, q4k)
487 #[arg(long)]
488 quantize: Option<String>,
489 /// Compress output (none, zstd, zstd-max, lz4)
490 #[arg(long)]
491 compress: Option<String>,
492 /// Output file path
493 #[arg(short, long)]
494 output: PathBuf,
495 /// Force overwrite existing files
496 #[arg(short, long)]
497 force: bool,
498 },
499 /// Stamp provenance fields (license, data_source, data_license) onto an existing .apr file
500 ///
501 /// SHIP-009 full-discharge enabler — patches the three provenance fields on
502 /// a pre-built APR v2 artifact (e.g., the shipped MODEL-1 teacher whose
503 /// fields are all (missing) because it was built before GATE-APR-PROV-001..003
504 /// shipped). Tensor bytes and header flags are preserved verbatim.
505 Stamp {
506 /// Path to input .apr model file
507 #[arg(value_name = "FILE")]
508 file: PathBuf,
509 /// SPDX license identifier (e.g., Apache-2.0)
510 #[arg(long)]
511 license: Option<String>,
512 /// Training-data source (e.g., huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
513 #[arg(long = "data-source")]
514 data_source: Option<String>,
515 /// SPDX license for data_source (e.g., Apache-2.0)
516 #[arg(long = "data-license")]
517 data_license: Option<String>,
518 /// HuggingFace class name (e.g., Qwen2ForCausalLM, LlamaForCausalLM).
519 ///
520 /// PMAT-690 P0-K extension (SPEC §86): patch the upstream
521 /// `architectures[0]` stamp on a pre-P0-K APR so downstream
522 /// consumers (apr inspect --quality, apr pretrain --init,
523 /// apr export → llama-cli) see the correct HF identity.
524 #[arg(long = "hf-architecture")]
525 hf_architecture: Option<String>,
526 /// HuggingFace model_type slug (e.g., qwen2, llama).
527 ///
528 /// PMAT-690 P0-K extension (SPEC §86).
529 #[arg(long = "hf-model-type")]
530 hf_model_type: Option<String>,
531 /// Lowercase architecture family slug (e.g., qwen2, llama).
532 ///
533 /// PMAT-690 P0-K extension (SPEC §86). This is the field
534 /// `apr pretrain --init` reads for arch dispatch — without
535 /// patching it, pre-P0-K checkpoints with the P0-H "LlamaForCausalLM"
536 /// fallback in this field cannot be loaded as Qwen2 inits.
537 #[arg(long)]
538 architecture: Option<String>,
539 /// Directory containing tokenizer files (vocab.json + merges.txt
540 /// OR tokenizer.json). When provided, embeds the vocabulary +
541 /// BPE merges into the APR's `custom.tokenizer.vocabulary` /
542 /// `custom.tokenizer.merges` JSON metadata AND sets the
543 /// HAS_VOCAB header flag.
544 ///
545 /// PMAT-690 P3-C-prep defect 1 fix (2026-05-17): pre-P0-K APRs
546 /// trained from inits without embedded tokenizers fail `apr run`
547 /// with PMAT-172. This flag lets the §86 salvage recipe embed
548 /// the tokenizer post-hoc so the artifact is self-contained
549 /// for inference (the apr binary's headline use case).
550 #[arg(long = "tokenizer", value_name = "DIR")]
551 tokenizer_dir: Option<PathBuf>,
552 /// Output file path
553 #[arg(short, long)]
554 output: PathBuf,
555 /// Force overwrite existing files
556 #[arg(short, long)]
557 force: bool,
558 },
559 /// Compile model into standalone executable (APR-SPEC §4.16)
560 Compile {
561 /// Input .apr model file
562 #[arg(value_name = "FILE", required_unless_present = "list_targets")]
563 file: Option<PathBuf>,
564 /// Output binary path (default: derived from model name)
565 #[arg(short, long)]
566 output: Option<PathBuf>,
567 /// Target triple (e.g., x86_64-unknown-linux-musl)
568 #[arg(long)]
569 target: Option<String>,
570 /// Quantize weights before embedding (int8, int4, fp16)
571 #[arg(long)]
572 quantize: Option<String>,
573 /// Release mode (optimized)
574 #[arg(long)]
575 release: bool,
576 /// Strip debug symbols
577 #[arg(long)]
578 strip: bool,
579 /// Enable LTO (Link-Time Optimization)
580 #[arg(long)]
581 lto: bool,
582 /// List available compilation targets
583 #[arg(long)]
584 list_targets: bool,
585 },
586 /// Merge multiple models
587 Merge {
588 /// Model files to merge
589 #[arg(value_name = "FILES", num_args = 2..)]
590 files: Vec<PathBuf>,
591 /// Merge strategy (average, weighted, slerp, ties, dare)
592 #[arg(long, default_value = "average")]
593 strategy: String,
594 /// Output file path (optional in --plan mode)
595 #[arg(short, long, required_unless_present = "plan")]
596 output: Option<PathBuf>,
597 /// Weights for weighted merge (comma-separated, e.g., "0.7,0.3")
598 #[arg(long, value_delimiter = ',')]
599 weights: Option<Vec<f32>>,
600 /// Base model for TIES/DARE (task vectors computed as delta from base)
601 #[arg(long)]
602 base_model: Option<PathBuf>,
603 /// DARE drop probability (default: 0.9)
604 #[arg(long, default_value = "0.9")]
605 drop_rate: f32,
606 /// TIES trim density threshold (default: 0.2)
607 #[arg(long, default_value = "0.2")]
608 density: f32,
609 /// RNG seed for DARE (default: 42)
610 #[arg(long, default_value = "42")]
611 seed: u64,
612 /// Plan mode (validate inputs, show merge plan, no execution)
613 #[arg(long)]
614 plan: bool,
615 },
616 /// Quantize model weights (GH-243)
617 Quantize {
618 /// Input model file
619 #[arg(value_name = "FILE")]
620 file: PathBuf,
621 /// Quantization scheme: int8, int4, fp16, q4k
622 #[arg(long, short = 's', default_value = "int4")]
623 scheme: String,
624 /// Output file path (required unless --plan)
625 #[arg(short, long)]
626 output: Option<PathBuf>,
627 /// Output format override (apr, gguf, safetensors)
628 #[arg(long)]
629 format: Option<String>,
630 /// Batch quantization (comma-separated schemes)
631 #[arg(long)]
632 batch: Option<String>,
633 /// Plan mode (estimate only, no execution)
634 #[arg(long)]
635 plan: bool,
636 /// Force overwrite existing files
637 #[arg(short, long)]
638 force: bool,
639 },
640 /// Model optimization commands (fine-tune, prune, distill)
641 #[command(flatten)]
642 ModelOps(ModelOpsCommands),
643 /// Start the MCP (Model Context Protocol) server over stdio
644 ///
645 /// Exposes `apr` as MCP tools for Claude Code, Cursor, Cline, and other
646 /// MCP clients. Configure via `.mcp.json` with `{"command":"apr","args":["mcp"]}`.
647 Mcp {},
648 /// Interactive terminal UI
649 Tui {
650 /// Path to .apr model file
651 #[arg(value_name = "FILE")]
652 file: Option<PathBuf>,
653 },
654 /// Model self-test: 10-stage pipeline integrity check (APR-TRACE-001)
655 Check {
656 /// Path to model file
657 #[arg(value_name = "FILE")]
658 file: PathBuf,
659 /// Disable GPU acceleration
660 #[arg(long)]
661 no_gpu: bool,
662 /// Output as JSON
663 #[arg(long)]
664 json: bool,
665 },
666 /// GPU status and VRAM reservation management (GPU-SHARE-001)
667 #[cfg(feature = "training")]
668 Gpu {
669 /// Show reservations as JSON
670 #[arg(long)]
671 json: bool,
672 },
673 /// Sovereign AI coding assistant — all inference local via realizar (PMAT-182)
674
675 Code {
676 /// Path to local GGUF/APR model file (prefers .apr format)
677 #[arg(long)]
678 model: Option<PathBuf>,
679
680 /// Project directory (loads APR.md/CLAUDE.md from this path)
681 #[arg(long, default_value = ".")]
682 project: PathBuf,
683
684 /// Resume previous session (optionally by ID)
685 #[arg(long)]
686 resume: Option<Option<String>>,
687
688 /// Agent manifest (advanced — overrides defaults)
689 #[arg(long)]
690 manifest: Option<PathBuf>,
691
692 /// Initial prompt (non-interactive: print response and exit)
693 #[arg(short, long)]
694 print: bool,
695
696 /// Prompt text (positional, for -p mode)
697 #[arg(trailing_var_arg = true)]
698 prompt: Vec<String>,
699
700 /// Max turns before stopping
701 #[arg(long, default_value = "50")]
702 max_turns: u32,
703
704 /// Emit a `ccpa-trace.jsonl` describing the run to this path.
705 /// Format mirrors the schema at
706 /// <https://github.com/paiml/claude-code-parity-apr/blob/main/contracts/claude-code-parity-apr-v1.yaml>
707 /// (`§ trace_schema`). Used by `ccpa measure` to score apr-code
708 /// against canonical Claude Code reference fixtures.
709 #[arg(long)]
710 emit_trace: Option<PathBuf>,
711
712 /// Output format for non-interactive (`-p`) mode (PMAT-CODE-OUTPUT-FORMAT-001).
713 /// `text` (default): plain assistant text.
714 /// `json`: structured `{type:"result", subtype:"success", result, session_id, duration_ms}`
715 /// envelope matching Claude Code's `claude -p --output-format json` shape.
716 #[arg(long, value_enum, default_value_t = CodeOutputFormat::Text)]
717 output_format: CodeOutputFormat,
718
719 /// Input format for non-interactive stdin (PMAT-CODE-INPUT-FORMAT-001).
720 /// `text` (default): treat stdin as raw prompt text.
721 /// `json`: parse `{"role":"user","content":"..."}` from stdin and use `content`
722 /// as the prompt. Matches Claude Code's `claude -p --input-format json` shape.
723 #[arg(long, value_enum, default_value_t = CodeInputFormat::Text)]
724 input_format: CodeInputFormat,
725 },
726 /// Extended analysis, profiling, QA, and visualization commands
727 #[command(flatten)]
728 Extended(ExtendedCommands),
729
730 /// Monorepo management (publish, shims, audit, archive) [dev-only]
731 #[cfg(feature = "dev")]
732 #[command(subcommand)]
733 Mono(crate::commands::mono::MonoCommands),
734}