apr_cli/
extended_commands.rs

1/// Extended CLI commands (analysis, profiling, QA, benchmarks, and advanced tools).
2///
3/// Flattened into `Commands` via `#[command(flatten)]` so all subcommands remain
4/// top-level from the user's perspective (e.g., `apr chat`, `apr profile`).
5#[derive(Subcommand, Debug)]
6pub enum ExtendedCommands {
7    /// Interactive chat with language model
8    Chat {
9        /// Path to .apr model file
10        #[arg(value_name = "FILE")]
11        file: PathBuf,
12        /// Sampling temperature (0 = greedy, higher = more random)
13        #[arg(long, default_value = "0.7")]
14        temperature: f32,
15        /// Nucleus sampling threshold
16        #[arg(long, default_value = "0.9")]
17        top_p: f32,
18        /// Maximum tokens to generate per response
19        #[arg(long, default_value = "512")]
20        max_tokens: usize,
21        /// System prompt to set model behavior
22        #[arg(long)]
23        system: Option<String>,
24        /// Show inspection info (top-k probs, tokens/sec)
25        #[arg(long)]
26        inspect: bool,
27        /// Disable GPU acceleration (use CPU)
28        #[arg(long)]
29        no_gpu: bool,
30        /// Force GPU acceleration (requires CUDA)
31        #[arg(long)]
32        gpu: bool,
33        /// Enable inference tracing (APR-TRACE-001)
34        #[arg(long)]
35        trace: bool,
36        /// Trace specific steps only (comma-separated)
37        #[arg(long, value_delimiter = ',')]
38        trace_steps: Option<Vec<String>>,
39        /// Verbose tracing
40        #[arg(long)]
41        trace_verbose: bool,
42        /// Save trace output to JSON file
43        #[arg(long, value_name = "FILE")]
44        trace_output: Option<PathBuf>,
45        /// Trace detail level (none, basic, layer, payload)
46        #[arg(long, value_name = "LEVEL", default_value = "basic")]
47        trace_level: String,
48        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
49        #[arg(long)]
50        profile: bool,
51        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
52        #[arg(long, value_name = "BACKEND")]
53        backend: Option<String>,
54    },
55    /// Benchmark throughput (spec H12: >= 10 tok/s)
56    Bench {
57        /// Path to model file
58        #[arg(value_name = "FILE")]
59        file: PathBuf,
60        /// Number of warmup iterations
61        #[arg(long, default_value = "3")]
62        warmup: usize,
63        /// Number of measurement iterations
64        #[arg(long, default_value = "5")]
65        iterations: usize,
66        /// Max tokens to generate per iteration
67        #[arg(long, default_value = "32")]
68        max_tokens: usize,
69        /// Test prompt
70        #[arg(long)]
71        prompt: Option<String>,
72        /// Use realizar for fast inference (vs aprender baseline)
73        #[arg(long)]
74        fast: bool,
75        /// Benchmark specific brick
76        #[arg(long)]
77        brick: Option<String>,
78        /// Comma-separated latency percentile points for JSON output
79        /// (CRUX-E-07). Default: `50,95,99`. Values must be in (0, 100].
80        #[arg(long, value_delimiter = ',', default_value = "50,95,99")]
81        percentiles: Vec<f64>,
82    },
83    /// Evaluate model perplexity (spec H13: PPL <= 20) or classification metrics
84    Eval {
85        /// Path to model file or checkpoint directory
86        #[arg(value_name = "FILE")]
87        file: PathBuf,
88        /// Dataset: wikitext-2, lambada, or custom
89        #[arg(long, default_value = "wikitext-2")]
90        dataset: String,
91        /// Custom text (when dataset=custom)
92        #[arg(long)]
93        text: Option<String>,
94        /// Maximum tokens to evaluate
95        #[arg(long, default_value = "512")]
96        max_tokens: usize,
97        /// Perplexity threshold for pass/fail
98        #[arg(long, default_value = "20.0")]
99        threshold: f32,
100        /// Task type: omit for perplexity, "classify" for classification eval
101        #[arg(long)]
102        task: Option<String>,
103        /// Test data file (JSONL) for classification evaluation
104        #[arg(long, value_name = "FILE")]
105        data: Option<PathBuf>,
106        /// Model size hint: "0.5B", "tiny" (for classification eval)
107        #[arg(long)]
108        model_size: Option<String>,
109        /// Number of output classes (default: 5)
110        #[arg(long, default_value = "5")]
111        num_classes: usize,
112        /// Generate HuggingFace model card (README.md) in checkpoint dir
113        #[arg(long)]
114        generate_card: bool,
115        /// Device for inference: "cpu" (default) or "cuda" (GPU-accelerated, ALB-089)
116        #[arg(long, default_value = "cpu")]
117        device: String,
118        /// Number of samples per problem for pass@k (ALB-088, default: 1)
119        #[arg(long, default_value = "1")]
120        samples: usize,
121        /// Sampling temperature (0.0 = greedy, 0.8 = standard for pass@k>1)
122        #[arg(long, default_value = "0.0")]
123        temperature: f32,
124    },
125    /// Deep profiling with Roofline analysis
126    Profile {
127        /// Path to model file
128        #[arg(value_name = "FILE")]
129        file: PathBuf,
130        /// Layer-by-layer granular analysis
131        #[arg(long)]
132        granular: bool,
133        /// Output format (human, json, flamegraph)
134        #[arg(long, default_value = "human")]
135        format: String,
136        /// Focus on specific operation
137        #[arg(long)]
138        focus: Option<String>,
139        /// Detect naive implementations
140        #[arg(long)]
141        detect_naive: bool,
142        /// GFLOPS threshold for naive detection
143        #[arg(long, default_value = "10.0")]
144        threshold: f64,
145        /// Compare against HuggingFace baseline
146        #[arg(long)]
147        compare_hf: Option<String>,
148        /// Measure energy consumption (requires RAPL)
149        #[arg(long)]
150        energy: bool,
151        /// Compute performance grade (vs Ollama baseline)
152        #[arg(long)]
153        perf_grade: bool,
154        /// Show call graph
155        #[arg(long)]
156        callgraph: bool,
157        /// Exit non-zero if naive implementation detected
158        #[arg(long)]
159        fail_on_naive: bool,
160        /// Output file path for flamegraph SVG (GH-174, PMAT-182)
161        #[arg(long, short = 'o')]
162        output: Option<PathBuf>,
163
164        // PMAT-192: CI Assertion Mode (GH-180)
165        /// Enable CI mode with assertion checks (exits 1 on failure)
166        #[arg(long)]
167        ci: bool,
168        /// Minimum throughput in tok/s (CI assertion, exits 1 if below)
169        #[arg(long)]
170        assert_throughput: Option<f64>,
171        /// Maximum p99 latency in ms (CI assertion, exits 1 if above)
172        #[arg(long)]
173        assert_p99: Option<f64>,
174        /// Maximum p50 latency in ms (CI assertion, exits 1 if above)
175        #[arg(long)]
176        assert_p50: Option<f64>,
177        /// Warmup passes before measurement (default: 3)
178        #[arg(long, default_value = "3")]
179        warmup: usize,
180        /// Measurement passes (default: 10)
181        #[arg(long, default_value = "10")]
182        measure: usize,
183        /// Number of tokens to generate per measurement pass (default: 32)
184        #[arg(long, default_value = "32")]
185        tokens: usize,
186        /// Compare against Ollama baseline (runs ollama for comparison)
187        #[arg(long)]
188        ollama: bool,
189        /// Disable GPU (force CPU-only profiling)
190        #[arg(long)]
191        no_gpu: bool,
192        /// Compare against another model format (F-PROFILE-011)
193        #[arg(long, value_name = "FILE")]
194        compare: Option<PathBuf>,
195    },
196    /// Falsifiable QA checklist for model releases
197    Qa {
198        /// Path to model file
199        #[arg(value_name = "FILE")]
200        file: PathBuf,
201        /// Minimum throughput threshold in tok/s
202        #[arg(long, value_name = "TPS")]
203        assert_tps: Option<f64>,
204        /// Minimum speedup vs Ollama
205        #[arg(long, value_name = "SPEEDUP")]
206        assert_speedup: Option<f64>,
207        /// Minimum GPU vs CPU speedup (F-PERF-042)
208        #[arg(long, value_name = "SPEEDUP")]
209        assert_gpu_speedup: Option<f64>,
210        /// Skip golden output test
211        #[arg(long)]
212        skip_golden: bool,
213        /// Skip throughput benchmark
214        #[arg(long)]
215        skip_throughput: bool,
216        /// Skip Ollama parity comparison
217        #[arg(long)]
218        skip_ollama: bool,
219        /// Skip GPU vs CPU speedup test (F-PERF-042)
220        #[arg(long)]
221        skip_gpu_speedup: bool,
222        /// Skip tensor contract validation (PMAT-235)
223        #[arg(long)]
224        skip_contract: bool,
225        /// Skip cross-format parity test (F-QUAL-032)
226        #[arg(long)]
227        skip_format_parity: bool,
228        /// Skip PTX parity validation (GH-219)
229        #[arg(long)]
230        skip_ptx_parity: bool,
231        /// SafeTensors model path for cross-format parity test (F-QUAL-032)
232        #[arg(long, value_name = "PATH")]
233        safetensors_path: Option<PathBuf>,
234        /// Number of benchmark iterations
235        #[arg(long, default_value = "10")]
236        iterations: usize,
237        /// Number of warmup iterations
238        #[arg(long, default_value = "3")]
239        warmup: usize,
240        /// Maximum tokens to generate
241        #[arg(long, default_value = "32")]
242        max_tokens: usize,
243        /// Output as JSON (for CI integration)
244        #[arg(long)]
245        json: bool,
246        /// Verbose output
247        #[arg(short, long)]
248        verbose: bool,
249        /// Minimum number of gates that must execute (fail if fewer)
250        #[arg(long, value_name = "N")]
251        min_executed: Option<usize>,
252        /// Previous QA report for regression detection
253        #[arg(long, value_name = "FILE")]
254        previous_report: Option<PathBuf>,
255        /// Maximum allowed performance regression ratio (default: 0.10 = 10%)
256        #[arg(long, value_name = "RATIO")]
257        regression_threshold: Option<f64>,
258        /// Skip GPU state isolation test
259        #[arg(long)]
260        skip_gpu_state: bool,
261        /// Skip metadata plausibility validation (Bug 210, GH-222)
262        #[arg(long)]
263        skip_metadata: bool,
264        /// Skip GPU capability match gate (GH-280)
265        #[arg(long)]
266        skip_capability: bool,
267        /// Assert classifier head presence and shape (F-CLASS-004)
268        #[arg(long)]
269        assert_classifier_head: bool,
270    },
271    /// GPU/CPU parity check (PMAT-232: genchi genbutsu — see where GPU diverges)
272    Parity {
273        /// Path to GGUF model file
274        #[arg(value_name = "FILE")]
275        file: PathBuf,
276        /// Prompt text (default: "What is 2+2?")
277        #[arg(short, long, default_value = "What is 2+2?")]
278        prompt: String,
279        /// Assert parity (exit non-zero on divergence)
280        #[arg(long)]
281        assert: bool,
282    },
283    /// Model-to-PTX source mapping (Mieruka: make GPU kernel dispatch visible)
284    #[command(name = "ptx-map")]
285    PtxMap {
286        /// Path to GGUF model file
287        #[arg(value_name = "FILE")]
288        file: PathBuf,
289        /// Filter to specific kernel (e.g., --kernel Q4KGemv)
290        #[arg(long)]
291        kernel: Option<String>,
292        /// Reverse lookup: kernel name -> which layers/steps use it
293        #[arg(long)]
294        reverse: Option<String>,
295        /// Output as JSON
296        #[arg(long)]
297        json: bool,
298        /// Full PTX snippets and detailed analysis
299        #[arg(short, long)]
300        verbose: bool,
301        /// Show batched prefill kernel variants instead of decode
302        #[arg(long)]
303        prefill: bool,
304    },
305    /// PTX analysis and bug detection (register pressure, roofline)
306    #[command(name = "ptx")]
307    Ptx {
308        /// Path to a PTX source file
309        #[arg(value_name = "FILE")]
310        file: Option<PathBuf>,
311        /// Analyze a named kernel from trueno-gpu
312        #[arg(long, short)]
313        kernel: Option<String>,
314        /// Strict mode (no performance whitelist)
315        #[arg(long)]
316        strict: bool,
317        /// Show only bug analysis (skip register/memory/roofline)
318        #[arg(long)]
319        bugs: bool,
320        /// Output as JSON
321        #[arg(long)]
322        json: bool,
323        /// Verbose output (include PTX source listing)
324        #[arg(short, long)]
325        verbose: bool,
326    },
327    /// ML tuning: LoRA/QLoRA configuration, memory planning, and HPO (GH-176, SPEC-TUNE-2026-001)
328    #[cfg(feature = "training")]
329    Tune {
330        /// Path to model file (optional if using --model)
331        #[arg(value_name = "FILE")]
332        file: Option<PathBuf>,
333        /// Tuning method: auto, full, lora, qlora
334        #[arg(long, short = 'm', default_value = "auto")]
335        method: String,
336        /// LoRA rank (default: auto-selected)
337        #[arg(long, short = 'r')]
338        rank: Option<u32>,
339        /// Available VRAM in GB
340        #[arg(long, default_value = "16.0")]
341        vram: f64,
342        /// Only plan configuration, don't train
343        #[arg(long)]
344        plan: bool,
345        /// Model size for planning (e.g., "7B", "1.5B")
346        #[arg(long, value_name = "SIZE")]
347        model: Option<String>,
348        /// Freeze base model weights
349        #[arg(long)]
350        freeze_base: bool,
351        /// Training data file (JSONL format)
352        #[arg(long, value_name = "FILE")]
353        train_data: Option<PathBuf>,
354        /// Output as JSON (for CI integration)
355        #[arg(long)]
356        json: bool,
357        /// Task type for HPO: classify (SPEC-TUNE-2026-001)
358        #[arg(long)]
359        task: Option<String>,
360        /// Number of HPO trials (default: 10)
361        #[arg(long, default_value = "10")]
362        budget: usize,
363        /// HPO search strategy: tpe, grid, random
364        #[arg(long, default_value = "tpe")]
365        strategy: String,
366        /// HPO scheduler: asha, median, none
367        #[arg(long, default_value = "asha")]
368        scheduler: String,
369        /// Scout mode: 1 epoch per trial for fast exploration
370        #[arg(long)]
371        scout: bool,
372        /// Training data file for HPO (JSONL format)
373        #[arg(long, value_name = "FILE")]
374        data: Option<PathBuf>,
375        /// Number of output classes for classification
376        #[arg(long, default_value = "5")]
377        num_classes: usize,
378        /// Model size hint for HPO (e.g., "0.5B", "1.5B")
379        #[arg(long)]
380        model_size: Option<String>,
381        /// Warm-start from scout phase results directory
382        #[arg(long, value_name = "DIR")]
383        from_scout: Option<PathBuf>,
384        /// Maximum epochs per trial (full mode, default: 20)
385        #[arg(long, default_value = "20")]
386        max_epochs: usize,
387        /// Maximum wall-clock time (e.g., "8h", "30m")
388        #[arg(long)]
389        time_limit: Option<String>,
390    },
391    /// Attach live TUI to a running training session
392    #[cfg(feature = "training")]
393    Monitor {
394        /// Experiment output directory (same as finetune -o)
395        #[arg(value_name = "DIR")]
396        dir: Option<PathBuf>,
397        /// Refresh interval in milliseconds
398        #[arg(long, default_value = "500")]
399        refresh_ms: u64,
400        /// Compact display mode
401        #[arg(long)]
402        compact: bool,
403        /// Output JSON lines instead of TUI (for LLM agents and CI)
404        #[arg(long)]
405        json: bool,
406        /// Output format: tui (default), json, text
407        #[arg(long, default_value = "tui")]
408        format: String,
409    },
410    /// List, show, and compare training experiment runs
411    #[cfg(feature = "training")]
412    Runs {
413        #[command(subcommand)]
414        command: RunsCommands,
415    },
416    /// Interactive experiment browser (TUI with loss curves)
417    #[cfg(feature = "training")]
418    Experiment {
419        #[command(subcommand)]
420        command: ExperimentCommands,
421    },
422    /// ComputeBrick pipeline monitor (cbtop)
423    Cbtop {
424        /// Model name (e.g., qwen2.5-coder-1.5b)
425        #[arg(long)]
426        model: Option<String>,
427        /// Attach to running realizar process
428        #[arg(long)]
429        attach: Option<String>,
430        /// Path to GGUF model file for real profiling
431        #[arg(long, value_name = "MODEL")]
432        model_path: Option<PathBuf>,
433        /// Run in headless mode (no TUI, for CI/automation)
434        #[arg(long)]
435        headless: bool,
436        /// Output JSON format (requires --headless)
437        #[arg(long)]
438        json: bool,
439        /// Output file path (requires --headless)
440        #[arg(long, value_name = "FILE")]
441        output: Option<PathBuf>,
442        /// CI mode: exit with code 1 if thresholds not met
443        #[arg(long)]
444        ci: bool,
445        /// Minimum throughput threshold in tok/s (for --ci)
446        #[arg(long, value_name = "TOK_S")]
447        throughput: Option<f64>,
448        /// Minimum brick score threshold 0-100 (for --ci)
449        #[arg(long, value_name = "SCORE")]
450        brick_score: Option<u32>,
451        /// Number of warmup iterations before measurement
452        #[arg(long, default_value = "10")]
453        warmup: usize,
454        /// Number of measurement iterations
455        #[arg(long, default_value = "100")]
456        iterations: usize,
457        /// PAR-100: Enable speculative decoding benchmark
458        #[arg(long)]
459        speculative: bool,
460        /// PAR-100: Number of tokens to draft speculatively (default: 4)
461        #[arg(long, default_value = "4")]
462        speculation_k: usize,
463        /// PAR-099: Path to draft model for speculative decoding
464        #[arg(long, value_name = "DRAFT_MODEL")]
465        draft_model: Option<PathBuf>,
466        /// PAR-102: Number of concurrent requests
467        #[arg(long, default_value = "1")]
468        concurrent: usize,
469        /// Use simulated data (for CI testing only)
470        #[arg(long)]
471        simulated: bool,
472    },
473    /// Probar testing framework (GH-876 — visual regression, replay, more).
474    ///
475    /// GH-876 Milestone 1: `apr probar tensor` migrates the existing flat
476    /// `apr probar <FILE>` behavior (PMAT-481 tensor visual regression).
477    /// The remaining probador subcommands (test, record, coverage, playbook,
478    /// comply, av-sync, audio, video, animation, stress, llm) land in
479    /// follow-up PRs that delegate to the probador library.
480    Probar {
481        #[command(subcommand)]
482        command: ProbarSubcommand,
483    },
484    /// Compare APR model against HuggingFace source
485    #[command(name = "compare-hf")]
486    CompareHf {
487        /// Path to .apr model file
488        #[arg(value_name = "FILE")]
489        file: PathBuf,
490        /// HuggingFace repo ID (e.g., openai/whisper-tiny)
491        #[arg(long)]
492        hf: String,
493        /// Filter tensors by name pattern
494        #[arg(long)]
495        tensor: Option<String>,
496        /// Comparison threshold (default: 1e-5)
497        #[arg(long, default_value = "1e-5")]
498        threshold: f64,
499        /// Output as JSON
500        #[arg(long)]
501        json: bool,
502    },
503    /// CRUX-K-11: parse Ollama-style Modelfile DSL into apr config.
504    Modelfile {
505        #[command(subcommand)]
506        command: ModelfileSubcommand,
507    },
508    /// Format-aware binary forensics (10X better than xxd)
509    Hex {
510        /// Path to model file (APR, GGUF, or SafeTensors)
511        #[arg(value_name = "FILE")]
512        file: PathBuf,
513        /// Filter tensors by name pattern
514        #[arg(long)]
515        tensor: Option<String>,
516        /// Limit bytes/values to display
517        #[arg(long, default_value = "64")]
518        limit: usize,
519        /// Show tensor statistics
520        #[arg(long)]
521        stats: bool,
522        /// List tensor names only
523        #[arg(long)]
524        list: bool,
525        /// Output as JSON
526        #[arg(long)]
527        json: bool,
528        /// Annotated file header (magic, version, tensor count, metadata)
529        #[arg(long)]
530        header: bool,
531        /// Q4K/Q6K/Q8_0 super-block structure with field annotations
532        #[arg(long)]
533        blocks: bool,
534        /// Value histogram + entropy + kurtosis analysis
535        #[arg(long)]
536        distribution: bool,
537        /// Layout contract verification overlay per tensor
538        #[arg(long)]
539        contract: bool,
540        /// Per-region byte entropy analysis
541        #[arg(long)]
542        entropy: bool,
543        /// Raw bytes (like xxd but format-aware, with ASCII column)
544        #[arg(long)]
545        raw: bool,
546        /// Start at byte offset (supports 0x prefix for hex)
547        #[arg(long, default_value = "0")]
548        offset: String,
549        /// Bytes per row for raw output (default: 16)
550        #[arg(long, default_value = "16")]
551        width: usize,
552        /// Slice range for partial tensor reads (e.g., 0:3 for first 3 elements)
553        #[arg(long)]
554        slice: Option<String>,
555    },
556    /// Model architecture tree view
557    Tree {
558        /// Path to .apr model file
559        #[arg(value_name = "FILE")]
560        file: PathBuf,
561        /// Filter by component pattern
562        #[arg(long)]
563        filter: Option<String>,
564        /// Output format: ascii, dot, mermaid, json
565        #[arg(long, default_value = "ascii")]
566        format: String,
567        /// Show tensor sizes
568        #[arg(long)]
569        sizes: bool,
570        /// Maximum tree depth
571        #[arg(long)]
572        depth: Option<usize>,
573    },
574    /// Data flow visualization
575    Flow {
576        /// Path to .apr model file
577        #[arg(value_name = "FILE")]
578        file: PathBuf,
579        /// Filter by layer pattern
580        #[arg(long)]
581        layer: Option<String>,
582        /// Component to visualize: full, encoder, decoder, etc.
583        #[arg(long, default_value = "full")]
584        component: String,
585        /// Verbose output with statistics
586        #[arg(short, long)]
587        verbose: bool,
588        /// Output as JSON
589        #[arg(long)]
590        json: bool,
591    },
592    /// Cross-subcommand smoke test (does every tool handle this model?)
593    Qualify {
594        /// Path to model file (APR, GGUF, or SafeTensors)
595        #[arg(value_name = "FILE")]
596        file: PathBuf,
597        /// Testing tier: smoke (Phase 1), standard (+contracts), full (+playbook)
598        #[arg(long, default_value = "smoke")]
599        tier: String,
600        /// Timeout per gate in seconds
601        #[arg(long, default_value = "120")]
602        timeout: u64,
603        /// Output as JSON
604        #[arg(long)]
605        json: bool,
606        /// Show subcommand output (disable stdout suppression)
607        #[arg(short, long)]
608        verbose: bool,
609        /// Skip specific gates (comma-separated)
610        #[arg(long, value_delimiter = ',')]
611        skip: Option<Vec<String>>,
612    },
613    /// Training pipeline (plan/apply) — forjar-style pre-flight validation
614    #[cfg(feature = "training")]
615    Train {
616        #[command(subcommand)]
617        command: TrainCommands,
618    },
619    /// Pretraining loop driver (SHIP-TWO-001 MODEL-2).
620    ///
621    /// Wires the pretraining loop shape defined by
622    /// `contracts/training-loop-pretrain-v1.yaml`. Executes a synthetic
623    /// decreasing-loss drive by default so GATE-TRAIN-005 / -007 / -008
624    /// divergence-and-NaN guards can be exercised without an actual
625    /// 370M compute run. Real corpus wiring is a follow-up ticket.
626    #[cfg(feature = "training")]
627    Pretrain {
628        /// Dataset path (tokenized shard index or raw corpus).
629        #[arg(long, value_name = "PATH")]
630        dataset: PathBuf,
631        /// Tokenizer directory (vocab.json + merges.txt).
632        #[arg(long, value_name = "DIR")]
633        tokenizer: PathBuf,
634        /// Run output directory — checkpoints + metadata go to `{run_dir}/ckpt/`.
635        #[arg(long, value_name = "DIR")]
636        run_dir: PathBuf,
637        /// Training regime — finetune (MODEL-1) or from-scratch (MODEL-2 cold start).
638        /// Per contract training-loop-pretrain-v1 §hyperparameter_defaults,
639        /// this atomically flips (regime, lr_max, warmup_steps, target_val_loss)
640        /// unless explicit --lr / --warmup-steps / --target-val-loss override.
641        #[arg(long, value_enum, default_value = "finetune")]
642        mode: PretrainMode,
643        /// Peak learning rate after warmup. Omit to inherit mode default
644        /// (finetune: 5e-5, from-scratch: 3e-4).
645        #[arg(long)]
646        lr: Option<f32>,
647        /// Warmup + cosine decay total steps.
648        #[arg(long, default_value = "1000")]
649        num_steps: usize,
650        /// Number of warmup steps. Omit to inherit mode default
651        /// (finetune: 100, from-scratch: 1000).
652        #[arg(long)]
653        warmup_steps: Option<usize>,
654        /// Micro-batch size.
655        #[arg(long, default_value = "16")]
656        batch_size: usize,
657        /// Sequence length per example.
658        #[arg(long, default_value = "1024")]
659        seq_length: usize,
660        /// Steps per epoch — controls per-epoch artifact cadence.
661        #[arg(long, default_value = "100")]
662        steps_per_epoch: usize,
663        /// GATE-TRAIN-006 fixed RNG seed.
664        #[arg(long, default_value = "42")]
665        seed: u64,
666        /// Target val_loss. Omit to inherit mode default
667        /// (finetune: 2.2, from-scratch: 3.0).
668        #[arg(long)]
669        target_val_loss: Option<f32>,
670        /// Vocabulary size (required for `--mode from-scratch` INV-TRAIN-005
671        /// regime-dependent cap: 2·ln(vocab_size)). MODEL-2 uses 50257.
672        #[arg(long, default_value = "50257")]
673        vocab_size: u32,
674        /// Synthetic-drive only — do not attempt real compute, exercise loop gates only.
675        /// INV-TRAIN-010: absent = real compute (drive_real), present = synthetic (drive_synthetic).
676        #[arg(long, action = clap::ArgAction::SetTrue)]
677        synthetic: bool,
678        /// Training backend. Grammar (contract gpu-training-backend-v1
679        /// INV-GPUTRAIN-001): `^(cpu|cuda(:[0-9]|:1[0-5])?|auto)$`.
680        /// Default `auto` uses CUDA if available, else CPU (the only
681        /// spelling that may fall back silently — all other values
682        /// hard-fail on missing runtime per GATE-GPUTRAIN-002).
683        #[arg(long, default_value = "auto")]
684        device: String,
685        /// Initial weights from a pretrained APR file
686        /// (contract `apr-pretrain-from-init-v1`). Per spec §49's
687        /// MODEL-2 pretrained-init pivot: when present, load weights
688        /// from `<PATH>` instead of random-init. Composes with
689        /// `--mode finetune` (canonical) or `--mode from-scratch`
690        /// (allowed but non-canonical — emits a warning). Missing,
691        /// corrupted, or arch-mismatched APR files exit non-zero
692        /// before step 1 (no silent random-init fallback).
693        #[arg(long, value_name = "PATH")]
694        init: Option<PathBuf>,
695        /// SPEC §83 P0-J: bypass the Chinchilla compute-optimal hard
696        /// gate (`chinchilla-gate-v1`). Default is fail-fast when
697        /// D/N < 10× (severely under-provisioned per Hoffmann et al.
698        /// 2022). Pass this flag to acknowledge the under-provisioning
699        /// and proceed anyway (e.g. for ablation studies, resumed
700        /// runs, or smoke tests).
701        #[arg(long, action = clap::ArgAction::SetTrue)]
702        force_under_provisioned: bool,
703        /// SPEC §84 P2-F: shared held-out validation shard.
704        ///
705        /// When provided, the val-loss eval reads `HELD_OUT_BATCHES`
706        /// batches from this separate `.bin`-shards directory instead
707        /// of stealing the first 16 batches of `--dataset`. This makes
708        /// `val_loss` comparable across runs whose `--dataset`
709        /// composition changes (P2-C's audit-falsified result was
710        /// confounded by val sets being drawn from different corpus
711        /// distributions — qwen-v2 = codeparrot only, qwen-v3 =
712        /// codeparrot + the-stack-dedup).
713        ///
714        /// Path semantics: directory of `.bin` shards (same format as
715        /// `--dataset`). Operator tokenizes the held-out corpus
716        /// independently via `apr tokenize encode-corpus --max-docs N`
717        /// to a separate output dir, then passes that dir here. The
718        /// shard contract is `contracts/dataset-thestack-python-v1.yaml`.
719        ///
720        /// When omitted, falls back to the historical "first 16
721        /// batches of --dataset" behaviour for backwards compatibility.
722        #[arg(long, value_name = "DIR")]
723        val_shard: Option<PathBuf>,
724    },
725    /// Tokenizer training pipeline (plan/apply) — BPE vocabulary learning
726    Tokenize {
727        #[command(subcommand)]
728        command: TokenizeCommands,
729    },
730    /// Data quality pipeline (audit, split, balance) — powered by alimentar
731    Data {
732        #[command(subcommand)]
733        command: DataCommands,
734    },
735    /// Pipeline orchestration (plan/apply/status) — wraps forjar DAG engine
736    Pipeline {
737        #[command(subcommand)]
738        command: PipelineCommands,
739    },
740    /// Automated Five Whys diagnosis on a training checkpoint
741    Diagnose {
742        /// Path to checkpoint directory
743        #[arg(value_name = "CHECKPOINT_DIR")]
744        checkpoint_dir: PathBuf,
745        /// Test data file (JSONL) for evaluation
746        #[arg(long, value_name = "FILE")]
747        data: Option<PathBuf>,
748        /// Model size hint: "0.5B", "tiny"
749        #[arg(long)]
750        model_size: Option<String>,
751        /// Number of output classes (default: 5)
752        #[arg(long, default_value = "5")]
753        num_classes: usize,
754    },
755    /// Lint an Ollama /api/chat response for schema + NDJSON invariants (CRUX-C-04)
756    OllamaChatLint {
757        /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
758        #[arg(long, value_name = "FILE")]
759        response_file: PathBuf,
760        /// Treat input as NDJSON stream (one frame per line)
761        #[arg(long)]
762        stream: bool,
763    },
764    /// Lint an Ollama /api/chat function-calling response (CRUX-I-04)
765    OllamaToolsLint {
766        /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
767        #[arg(long, value_name = "FILE")]
768        response_file: PathBuf,
769        /// Optional captured request JSON — enables tool-name allowlist gate
770        /// (every called tool name must appear in request.tools[*].function.name)
771        #[arg(long, value_name = "FILE")]
772        request_file: Option<PathBuf>,
773        /// Treat input as NDJSON stream (one frame per line)
774        #[arg(long)]
775        stream: bool,
776    },
777    /// Lint a captured DRY-sampling observation (CRUX-C-23)
778    DrySamplingLint {
779        /// Path to observation JSON
780        #[arg(long, value_name = "FILE")]
781        observation_file: PathBuf,
782    },
783    /// Lint a captured AWQ quality/compression/flags observation (CRUX-B-08)
784    AwqLint {
785        /// Path to captured AWQ observation JSON
786        #[arg(long, value_name = "FILE")]
787        observation_file: PathBuf,
788    },
789    /// Lint a captured FP8 (E4M3) round-trip + SM-capability observation (CRUX-B-11)
790    Fp8Lint {
791        /// Path to captured observation JSON (frobenius, capability blocks)
792        #[arg(long, value_name = "FILE")]
793        observation_file: PathBuf,
794    },
795    /// Lint a captured NF4 codebook/roundtrip/storage/parity observation (CRUX-B-10)
796    Nf4Lint {
797        /// Path to captured NF4 observation JSON
798        #[arg(long, value_name = "FILE")]
799        observation_file: PathBuf,
800    },
801    /// Lint a captured GPTQ compression/cosine/flags observation (CRUX-B-09)
802    GptqLint {
803        /// Path to captured GPTQ observation JSON
804        #[arg(long, value_name = "FILE")]
805        observation_file: PathBuf,
806    },
807    /// Lint a captured CUDA OOM postmortem report (CRUX-F-13)
808    OomLint {
809        /// Path to captured OOM postmortem JSON (e.g. /tmp/apr-oom-<ts>.json)
810        #[arg(long, value_name = "FILE")]
811        report_file: PathBuf,
812        /// Optional captured stderr log to verify the OOM_REPORT breadcrumb
813        #[arg(long, value_name = "FILE")]
814        stderr_file: Option<PathBuf>,
815    },
816    /// Lint a captured NCCL failure-diagnostics JSON from stderr (CRUX-F-15)
817    NcclDiagLint {
818        /// Path to captured stderr JSON diagnostic
819        #[arg(long, value_name = "FILE")]
820        diag_file: PathBuf,
821        /// Optional observed exit code (gate: >= 128 = NCCL class)
822        #[arg(long, value_name = "I32")]
823        exit_code: Option<i32>,
824        /// Require the `suggest` field to cite an nvidia.com / NVIDIA/nccl URL
825        #[arg(long)]
826        require_doc_link: bool,
827    },
828    /// Lint a captured `apr agent --trace` ReAct loop trace (CRUX-I-06)
829    ReactTraceLint {
830        /// Path to captured trace JSON
831        #[arg(long, value_name = "FILE")]
832        trace_file: PathBuf,
833        /// Optional max_iterations budget the trace was produced under
834        #[arg(long, value_name = "N")]
835        max_iterations: Option<i64>,
836        /// Require the scratchpad to parse cleanly as Thought/Action/Observation blocks
837        #[arg(long)]
838        require_grammar: bool,
839    },
840    /// Lint a captured `$APR_TRACE_DIR` hang stack-dump directory (CRUX-F-14)
841    HangTraceLint {
842        /// Path to the captured trace directory
843        #[arg(long, value_name = "DIR")]
844        trace_dir: PathBuf,
845        /// Inspection mode: `timeout` (expects per-rank dumps) or `success` (expects empty dir)
846        #[arg(long, value_name = "MODE", default_value = "timeout")]
847        mode: String,
848        /// Expected world_size when mode=timeout (number of rank{N}.py.txt files)
849        #[arg(long, value_name = "N", default_value_t = 2)]
850        world_size: usize,
851        /// Actual exit code from the run under inspection (for exit-code gate)
852        #[arg(long, value_name = "I32")]
853        exit_code: Option<i32>,
854        /// Expected exit code (typically 124 for timeout, 1 for other error, 0 for success)
855        #[arg(long, value_name = "I32")]
856        expected_exit_code: Option<i32>,
857    },
858    /// Lint two captured `apr finetune --parallel ddp --json` outputs (N=1, N=k) (CRUX-D-11)
859    DdpMetricsLint {
860        /// Path to N=1 metrics JSON
861        #[arg(long, value_name = "FILE")]
862        metrics_1gpu_file: PathBuf,
863        /// Path to N=world_size metrics JSON
864        #[arg(long, value_name = "FILE")]
865        metrics_ngpu_file: PathBuf,
866        /// World size used for --metrics-ngpu-file run (>= 2)
867        #[arg(long, value_name = "N")]
868        world_size: i64,
869        /// Scaling-efficiency floor (default 0.85, PyTorch DDP convention)
870        #[arg(long, value_name = "F", default_value_t = 0.85)]
871        scaling_floor: f64,
872        /// Loss-parity relative tolerance (default 0.01)
873        #[arg(long, value_name = "F", default_value_t = 0.01)]
874        loss_tolerance: f64,
875    },
876    /// Lint a captured `apr dataset audio-inspect --format json` body (CRUX-H-13)
877    AudioInspectLint {
878        /// Path to captured JSON body
879        #[arg(long, value_name = "FILE")]
880        json_file: PathBuf,
881        /// Optional expected sample_rate (typically the `--resample-to` arg)
882        #[arg(long, value_name = "U32")]
883        expected_sample_rate: Option<u32>,
884        /// Optional expected channel count (1 = mono after --mono)
885        #[arg(long, value_name = "U32")]
886        expected_channels: Option<u32>,
887    },
888    /// Lint captured flash-attn2 parity + provenance JSON outputs (CRUX-L-02)
889    AttnParityLint {
890        /// Path to captured `apr kernel parity --impl flash2 --ref naive --json` body
891        #[arg(long, value_name = "FILE")]
892        parity_file: Option<PathBuf>,
893        /// Path to captured `apr run --attn flash2 --json` body for provenance check
894        #[arg(long, value_name = "FILE")]
895        provenance_file: Option<PathBuf>,
896        /// Path to captured head_dim error JSON
897        #[arg(long, value_name = "FILE")]
898        head_dim_error_file: Option<PathBuf>,
899        /// Max absolute diff tolerance (default 5e-3, FlashAttention-2 bound)
900        #[arg(long, value_name = "F", default_value_t = 5e-3)]
901        tol_abs: f64,
902        /// Min cosine similarity floor (default 0.9999)
903        #[arg(long, value_name = "F", default_value_t = 0.9999)]
904        tol_cos: f64,
905    },
906    /// Lint a captured `apr attn-viz` attention dump (CRUX-F-17)
907    AttnVizLint {
908        /// Path to attention dump in JSON form (4-D [layers][heads][rows][cols] floats)
909        #[arg(long, value_name = "FILE")]
910        attn_file: Option<PathBuf>,
911        /// Path to HTML heatmap output
912        #[arg(long, value_name = "FILE")]
913        html_file: Option<PathBuf>,
914        /// Minimum <svg|<canvas open-tag count expected in HTML (|layers|*|heads|)
915        #[arg(long, value_name = "N", default_value_t = 1)]
916        expected_heatmaps: usize,
917        /// Row-softmax normalization tolerance (default 1e-5)
918        #[arg(long, value_name = "F64", default_value_t = 1e-5)]
919        tolerance: f64,
920        /// Causal-mask zero epsilon (default 1e-9)
921        #[arg(long, value_name = "F64", default_value_t = 1e-9)]
922        epsilon: f64,
923    },
924    /// Lint a captured `apr trace --check-finite` error JSON and/or `--list` coverage JSON (CRUX-F-11)
925    CheckFiniteLint {
926        /// Captured stderr JSON from `apr trace --check-finite` on a poisoned model
927        #[arg(long, value_name = "FILE")]
928        error_file: Option<PathBuf>,
929        /// Captured stdout JSON from `apr trace --check-finite --list`
930        #[arg(long, value_name = "FILE")]
931        list_file: Option<PathBuf>,
932        /// Minimum layer-coverage count when `--list-file` is supplied (default 100)
933        #[arg(long, value_name = "N", default_value_t = 100)]
934        min_layers: usize,
935    },
936    /// Lint a captured `apr debug embed-viz` CSV (CRUX-F-18)
937    EmbedVizLint {
938        /// Path to captured embed-viz CSV (token_id,token_str,x,y)
939        #[arg(long, value_name = "FILE")]
940        csv_file: PathBuf,
941        /// Expected row count == vocab_size (optional)
942        #[arg(long, value_name = "N")]
943        expected_vocab_size: Option<usize>,
944        /// Second CSV captured under the same seed for determinism check (optional)
945        #[arg(long, value_name = "FILE")]
946        csv_file_b: Option<PathBuf>,
947    },
948    /// Lint a captured `apr explain --format jsonl` token-selection trace (CRUX-F-19)
949    ExplainTokenLint {
950        /// Path to captured JSONL body (one sampled-token record per line)
951        #[arg(long, value_name = "FILE")]
952        jsonl_file: PathBuf,
953        /// Tolerance for `Σ post_prob ≈ 1.0` (default 1e-5)
954        #[arg(long, value_name = "F64", default_value_t = 1e-5)]
955        tolerance: f64,
956        /// Assert greedy decoding: sampled_id must equal argmax(pre_prob)
957        #[arg(long)]
958        require_greedy: bool,
959    },
960    /// Lint a captured GPU memory Chrome Trace Event Format JSON (CRUX-F-07)
961    GpuMemtraceLint {
962        /// Path to captured Chrome Trace JSON from `apr profile --gpu-memory-trace`
963        #[arg(long, value_name = "FILE")]
964        trace_file: PathBuf,
965    },
966    /// Lint a captured KV-cache utilization timeline (CRUX-F-06)
967    KvTimelineLint {
968        /// Path to captured `apr profile --kv-timeline --json` body
969        #[arg(long, value_name = "FILE")]
970        timeline_file: PathBuf,
971        /// Preemption threshold (default 0.95, vLLM canonical)
972        #[arg(long, value_name = "FRACTION", default_value_t = 0.95)]
973        preempt_threshold: f64,
974    },
975    /// Lint a captured OTLP/JSON ExportTraceServiceRequest body (CRUX-K-08)
976    OtlpLint {
977        /// Path to captured OTLP/JSON export body
978        #[arg(long, value_name = "FILE")]
979        otlp_file: PathBuf,
980        /// Require at least one `apr.inference` span to be present
981        #[arg(long)]
982        require_apr_span: bool,
983        /// Require gen_ai.* and apr.tokens.* attribute keys on some span
984        #[arg(long)]
985        require_genai_attrs: bool,
986        /// Verify W3C trace-context propagation: expect this 32-hex traceId
987        #[arg(long, value_name = "HEX32")]
988        expect_trace_id: Option<String>,
989    },
990    /// Lint a captured Prometheus /metrics response (CRUX-K-07)
991    PrometheusLint {
992        /// Path to captured /metrics response body (text/plain; version=0.0.4)
993        #[arg(long, value_name = "FILE")]
994        metrics_file: PathBuf,
995        /// Optional captured Content-Type header to verify against version=0.0.4
996        #[arg(long, value_name = "HEADER")]
997        content_type: Option<String>,
998        /// Require the K-07 metric set (apr_num_requests_running, ...) to be present
999        #[arg(long)]
1000        require_k07_metrics: bool,
1001    },
1002    /// Lint a captured OpenAI tool-use response (CRUX-C-11)
1003    ToolUseLint {
1004        /// Path to captured OpenAI tool-use response JSON
1005        #[arg(long, value_name = "FILE")]
1006        observation_file: PathBuf,
1007    },
1008    /// Lint a GBNF grammar-constrained observation (CRUX-C-10)
1009    GbnfLint {
1010        /// Path to captured GBNF observation JSON
1011        #[arg(long, value_name = "FILE")]
1012        observation_file: PathBuf,
1013    },
1014    /// Lint a typical-p sampling observation (CRUX-C-22)
1015    TypicalPLint {
1016        #[arg(long, value_name = "FILE")]
1017        observation_file: PathBuf,
1018    },
1019    /// Gradient-norm telemetry analysis (CRUX-F-09)
1020    GradNorm {
1021        /// Path to JSON file of per-step grad-norm records
1022        #[arg(long, value_name = "FILE")]
1023        history_file: PathBuf,
1024        /// Maximum allowed clipped grad-norm (for cap-violation check)
1025        #[arg(long, value_name = "M")]
1026        max_grad_norm: Option<f64>,
1027        /// Rolling-median window size for spike detection (in steps)
1028        #[arg(long, default_value = "16")]
1029        spike_window: usize,
1030        /// Multiplier threshold for spike detection
1031        #[arg(long, default_value = "10.0")]
1032        spike_multiplier: f64,
1033    },
1034    /// Lint a captured registry byte-quota observation (CRUX-A-22)
1035    RegistryQuotaLint {
1036        /// Path to captured quota/atomic/ceiling observation JSON
1037        #[arg(long, value_name = "FILE")]
1038        observation_file: PathBuf,
1039    },
1040    /// Lint a captured imatrix calibration observation (CRUX-B-07)
1041    ImatrixLint {
1042        /// Path to captured imatrix observation JSON
1043        #[arg(long, value_name = "FILE")]
1044        observation_file: PathBuf,
1045    },
1046    /// Lint a captured /v1/embeddings observation (CRUX-C-13)
1047    EmbeddingsLint {
1048        #[arg(long, value_name = "FILE")]
1049        observation_file: PathBuf,
1050    },
1051    /// Lint a captured Hub+local unified-search merge observation (CRUX-A-23)
1052    UnifiedSearchLint {
1053        /// Path to captured unified-search observation JSON
1054        #[arg(long, value_name = "FILE")]
1055        observation_file: PathBuf,
1056    },
1057    /// Lint a captured `apr rm` / `apr gc` blob-GC observation (CRUX-A-25)
1058    RmGcLint {
1059        /// Path to captured rm/gc observation JSON
1060        #[arg(long, value_name = "FILE")]
1061        observation_file: PathBuf,
1062    },
1063    /// Lint a captured APR_MODELS shared-cache observation (CRUX-A-21)
1064    SharedCacheLint {
1065        /// Path to captured dedup/permission observation JSON
1066        #[arg(long, value_name = "FILE")]
1067        observation_file: PathBuf,
1068    },
1069    /// Perplexity classifier (CRUX-E-02)
1070    Ppl {
1071        /// JSON file containing an array of per-token natural-log
1072        /// probabilities (e.g. `[-1.2, -0.5, -2.1, ...]`). Required.
1073        #[arg(long, value_name = "FILE")]
1074        log_probs_file: PathBuf,
1075    },
1076    /// Validate dequant→requant metadata preservation (CRUX-B-19)
1077    QuantPreservationLint {
1078        /// Reference GGUF (pre-roundtrip)
1079        #[arg(long, value_name = "REF.gguf")]
1080        reference: PathBuf,
1081        /// Requantized GGUF (post-roundtrip)
1082        #[arg(long, value_name = "REQ.gguf")]
1083        requant: PathBuf,
1084    },
1085    /// Split a safetensors file into shards + weight-map index (CRUX-B-05)
1086    Shard {
1087        /// Single-file safetensors model to split
1088        #[arg(value_name = "FILE")]
1089        file: PathBuf,
1090        /// Maximum size of each shard (e.g. 5GB, 500MB, 1.5GiB)
1091        #[arg(long, value_name = "SIZE", default_value = "5GB")]
1092        max_shard_size: String,
1093        /// Output directory for shards + model.safetensors.index.json
1094        #[arg(short, long, value_name = "DIR")]
1095        output: PathBuf,
1096    },
1097    /// Reconstruct a single safetensors file from a sharded directory (CRUX-B-05)
1098    Unshard {
1099        /// Sharded directory containing model.safetensors.index.json
1100        #[arg(value_name = "DIR")]
1101        input: PathBuf,
1102        /// Output single-file safetensors path
1103        #[arg(short, long, value_name = "FILE")]
1104        output: PathBuf,
1105    },
1106    /// Publishing, conversion, and analysis tools
1107    #[command(flatten)]
1108    Tools(ToolCommands),
1109    /// Score a query/passage pair (or rank multiple passages) with a BERT
1110    /// cross-encoder loaded from an APR v2 file (GH-326 Phase 3).
1111    ///
1112    /// Wraps `aprender_core::models::bert::CrossEncoder::load_from_reader`
1113    /// + `score()`. The APR must contain the canonical HF BERT tensor
1114    /// names (see `models::bert::expected_bert_tensor_names`).
1115    ///
1116    /// Tokenisation is NOT applied here — caller passes pre-tokenised
1117    /// `input_ids` + `token_type_ids` as comma-delimited u32 lists. A
1118    /// dedicated tokeniser-aware mode is Phase 3b follow-up scope.
1119    Rerank {
1120        /// Path to the APR file containing the cross-encoder weights.
1121        #[arg(value_name = "MODEL")]
1122        model: PathBuf,
1123        /// Pre-tokenised input ids (comma-separated `u32`s). Mutually
1124        /// exclusive with `--query`+`--passage`+`--vocab` (Phase 3b).
1125        /// Example: `--input-ids 101,2024,102,3456,102` for `[CLS] q [SEP] p [SEP]`.
1126        #[arg(long, value_name = "IDS")]
1127        input_ids: Option<String>,
1128        /// Pre-tokenised token-type ids (comma-separated `u32`s).
1129        /// Same length as `--input-ids`. 0 for query side, 1 for passage.
1130        #[arg(long, value_name = "IDS")]
1131        token_type_ids: Option<String>,
1132        /// Phase 3b — query text. Pair with `--passage` + `--vocab` to enable
1133        /// in-process WordPiece tokenisation. The tokeniser builds
1134        /// `[CLS] query [SEP] passage [SEP]` with `token_type_ids = 0` for
1135        /// the query side and `1` for the passage side.
1136        #[arg(long, value_name = "TEXT")]
1137        query: Option<String>,
1138        /// Phase 3b — passage text. Required when `--query` is supplied
1139        /// in single-pair mode (use `--passages` for batch ranking).
1140        #[arg(long, value_name = "TEXT")]
1141        passage: Option<String>,
1142        /// Phase 5 — batch ranking mode (#326). Passage candidates to
1143        /// score against `--query`. May be supplied multiple times:
1144        /// `apr rerank model.apr --query "..." --passages "p1" --passages "p2"`.
1145        /// Mutually exclusive with `--passage`. Output is one
1146        /// `score[i]` line per passage in input order, OR a JSON array
1147        /// of `{passage, logit, score}` objects sorted by descending
1148        /// score when `--sort` is set.
1149        #[arg(long, value_name = "TEXT")]
1150        passages: Vec<String>,
1151        /// Phase 5 — sort batch output by descending score (highest
1152        /// relevance first). Only meaningful with `--passages` and
1153        /// `--json`. Default: preserve input order.
1154        #[arg(long)]
1155        sort: bool,
1156        /// Phase 5 — limit to top-K passages after sorting. Implies
1157        /// `--sort`. Default 0 (no limit).
1158        #[arg(long, default_value_t = 0)]
1159        top_k: usize,
1160        /// Phase 3b — path to a WordPiece `vocab.txt` (one token per line,
1161        /// line index = token id). Required when `--query` is supplied.
1162        /// Must contain entries for `[CLS]`, `[SEP]`, and `[UNK]`.
1163        /// Phase 4 accepts HuggingFace `tokenizer.json` (extension-detected).
1164        #[arg(long, value_name = "FILE")]
1165        vocab: Option<PathBuf>,
1166        /// Override hidden_dim (default: 384 / MiniLM-L-6).
1167        #[arg(long, default_value_t = 384)]
1168        hidden_dim: usize,
1169        /// Override num_layers (default: 6 / MiniLM-L-6).
1170        #[arg(long, default_value_t = 6)]
1171        num_layers: usize,
1172        /// Override num_heads (default: 12 / MiniLM-L-6).
1173        #[arg(long, default_value_t = 12)]
1174        num_heads: usize,
1175        /// Override intermediate_dim (default: 1536 / MiniLM-L-6).
1176        #[arg(long, default_value_t = 1536)]
1177        intermediate_dim: usize,
1178        /// Override vocab_size (default: 30522 / bert-base-uncased).
1179        #[arg(long, default_value_t = 30522)]
1180        vocab_size: usize,
1181        /// Override max_position_embeddings (default: 512).
1182        #[arg(long, default_value_t = 512)]
1183        max_position_embeddings: usize,
1184        /// Override type_vocab_size (default: 2).
1185        #[arg(long, default_value_t = 2)]
1186        type_vocab_size: usize,
1187        /// Number of labels in the classifier head (default: 1 for
1188        /// regression-style relevance scoring).
1189        #[arg(long, default_value_t = 1)]
1190        num_labels: usize,
1191        /// Load the optional BERT pooler dense layer (default: true).
1192        /// Cross-encoders that skip the pooler should pass `--with-pooler false`.
1193        #[arg(long, default_value_t = true)]
1194        with_pooler: bool,
1195        /// Emit the raw logit instead of the sigmoid-mapped relevance score.
1196        #[arg(long)]
1197        raw_logit: bool,
1198        /// Output as JSON.
1199        #[arg(long)]
1200        json: bool,
1201    },
1202    /// Produce sentence embeddings from a BERT bi-encoder (GH-326 Phase 6).
1203    ///
1204    /// First-stage dense retrieval companion to `apr rerank`. Loads an
1205    /// encoder-only BertModel (e.g. `sentence-transformers/all-MiniLM-L6-v2`),
1206    /// tokenises the input text with WordPiece, runs the full encoder
1207    /// forward, then pools the hidden states with one of:
1208    ///   `--pool cls`  — take the [CLS] hidden state
1209    ///   `--pool mean` — mean over non-padding token positions (default;
1210    ///                   sentence-transformers convention)
1211    /// Optionally L2-normalises the result (`--normalize`, default true,
1212    /// matches sentence-transformers).
1213    Embed {
1214        /// Path to the APR file containing the encoder weights (BertModel).
1215        #[arg(value_name = "MODEL")]
1216        model: PathBuf,
1217        /// Text to encode. Repeatable: `apr embed model.apr --text "a" --text "b" --vocab tok.json`.
1218        #[arg(long, value_name = "TEXT")]
1219        text: Vec<String>,
1220        /// Phase 7 (GH-326) — read texts from a file, one per line.
1221        /// Concatenated with `--text` inputs in order: `--text` first,
1222        /// then `--text-file` rows. Blank lines and lines starting
1223        /// with `#` are skipped. Useful for RAG-style first-stage
1224        /// retrieval where the second-stage rerank candidate set
1225        /// (50-100 documents) is the embed input.
1226        #[arg(long, value_name = "FILE")]
1227        text_file: Option<PathBuf>,
1228        /// Path to a WordPiece `vocab.txt` or HF `tokenizer.json`.
1229        #[arg(long, value_name = "FILE")]
1230        vocab: PathBuf,
1231        /// Pooling strategy (`cls` or `mean`). Default: `mean`
1232        /// (matches sentence-transformers convention).
1233        #[arg(long, default_value = "mean")]
1234        pool: String,
1235        /// L2-normalise the output embedding. Default: true (matches
1236        /// sentence-transformers convention). Pass `--normalize false`
1237        /// to keep raw magnitudes.
1238        #[arg(long, default_value_t = true)]
1239        normalize: bool,
1240        /// Override hidden_dim (default: 384 / MiniLM).
1241        #[arg(long, default_value_t = 384)]
1242        hidden_dim: usize,
1243        /// Override num_layers (default: 6 / MiniLM-L-6).
1244        #[arg(long, default_value_t = 6)]
1245        num_layers: usize,
1246        /// Override num_heads.
1247        #[arg(long, default_value_t = 12)]
1248        num_heads: usize,
1249        /// Override intermediate_dim.
1250        #[arg(long, default_value_t = 1536)]
1251        intermediate_dim: usize,
1252        /// Override vocab_size.
1253        #[arg(long, default_value_t = 30522)]
1254        vocab_size: usize,
1255        /// Override max_position_embeddings.
1256        #[arg(long, default_value_t = 512)]
1257        max_position_embeddings: usize,
1258        /// Override type_vocab_size.
1259        #[arg(long, default_value_t = 2)]
1260        type_vocab_size: usize,
1261        /// Output as JSON.
1262        #[arg(long)]
1263        json: bool,
1264    },
1265}
1266
1267#[cfg(feature = "training")]
1268/// Subcommands for `apr runs` — experiment run management (ALB-050/051)
1269#[derive(Subcommand, Debug)]
1270pub enum RunsCommands {
1271    /// List all training experiment runs (with inline loss sparklines)
1272    Ls {
1273        /// Directory to scan for experiments (default: current dir)
1274        #[arg(long, value_name = "DIR")]
1275        dir: Option<PathBuf>,
1276        /// Read from global experiment registry (~/.entrenar/experiments.db)
1277        #[arg(long)]
1278        global: bool,
1279        /// Filter by status: running, completed, failed, all
1280        #[arg(long, default_value = "all")]
1281        status: String,
1282        /// Output as JSON
1283        #[arg(long)]
1284        json: bool,
1285        /// Maximum number of runs to show
1286        #[arg(long, default_value = "50")]
1287        limit: usize,
1288    },
1289    /// Show detailed metrics for a specific run (with braille loss curve)
1290    Show {
1291        /// Run ID
1292        #[arg(value_name = "RUN_ID")]
1293        run_id: String,
1294        /// Directory containing experiment DB
1295        #[arg(long, value_name = "DIR")]
1296        dir: Option<PathBuf>,
1297        /// Read from global registry
1298        #[arg(long)]
1299        global: bool,
1300        /// Output as JSON
1301        #[arg(long)]
1302        json: bool,
1303    },
1304    /// Compare two runs side-by-side (loss curves, config diff, metrics)
1305    Diff {
1306        /// First run ID
1307        #[arg(value_name = "RUN_A")]
1308        run_a: String,
1309        /// Second run ID
1310        #[arg(value_name = "RUN_B")]
1311        run_b: String,
1312        /// Directory containing experiment DB
1313        #[arg(long, value_name = "DIR")]
1314        dir: Option<PathBuf>,
1315        /// Read from global registry
1316        #[arg(long)]
1317        global: bool,
1318        /// Output as JSON
1319        #[arg(long)]
1320        json: bool,
1321    },
1322}
1323
1324#[cfg(feature = "training")]
1325/// Subcommands for `apr experiment` — interactive experiment browser (ALB-024)
1326#[derive(Subcommand, Debug)]
1327pub enum ExperimentCommands {
1328    /// Browse experiment history with interactive TUI (loss curves, params)
1329    View {
1330        /// Path to experiment database file
1331        #[arg(long, value_name = "FILE")]
1332        db: Option<PathBuf>,
1333        /// Read from global experiment registry (~/.entrenar/experiments.db)
1334        #[arg(long)]
1335        global: bool,
1336        /// Output as JSON (non-interactive)
1337        #[arg(long)]
1338        json: bool,
1339    },
1340}
1341
1342/// CRUX-K-11: Subcommands for `apr modelfile`.
1343#[derive(Subcommand, Debug)]
1344pub enum ModelfileSubcommand {
1345    /// Parse an Ollama-style Modelfile and emit the parsed config.
1346    ///
1347    /// Grammar: `FROM`, `PARAMETER`, `TEMPLATE`, `SYSTEM`, `LICENSE`,
1348    /// `MESSAGE`, `ADAPTER` directives. Triple-quoted blocks supported.
1349    /// Directive names are case-insensitive. Unknown directives raise
1350    /// `file:line:col` errors.
1351    Parse {
1352        /// Path to the Modelfile
1353        #[arg(value_name = "FILE")]
1354        file: PathBuf,
1355        /// Output format: `json` or `human`
1356        #[arg(long, default_value = "json")]
1357        format: String,
1358    },
1359}
1360
1361/// GH-876: Subcommands for `apr probar` — consolidates the probador testing
1362/// framework under `apr`. Milestone 1 ships only `tensor` (the migrated
1363/// existing behavior). Subsequent milestones add the remaining 14 probador
1364/// subcommands as separate PRs that delegate to the probador library.
1365#[derive(Subcommand, Debug)]
1366pub enum ProbarSubcommand {
1367    /// Export tensor activations for visual regression testing (PMAT-481).
1368    ///
1369    /// Generates JSON/PNG per-layer test artifacts that can be compared
1370    /// against a golden reference directory to detect regressions in
1371    /// model behavior after weight updates, quantization, or refactors.
1372    Tensor {
1373        /// Path to .apr model file
1374        #[arg(value_name = "FILE")]
1375        file: PathBuf,
1376        /// Output directory for test artifacts
1377        #[arg(short, long, default_value = "./probar-export")]
1378        output: PathBuf,
1379        /// Export format: json, png, or both
1380        #[arg(long, default_value = "both")]
1381        format: String,
1382        /// Golden reference directory for comparison
1383        #[arg(long)]
1384        golden: Option<PathBuf>,
1385        /// Filter layers by name pattern
1386        #[arg(long)]
1387        layer: Option<String>,
1388        /// Exit non-zero on golden divergence (CI mode, PMAT-481)
1389        #[arg(long)]
1390        assert: bool,
1391        /// Cosine similarity threshold for golden comparison (default: 0.98)
1392        #[arg(long, default_value = "0.98")]
1393        tolerance: f32,
1394    },
1395}
apr_cli/extended_commands.rs

apr_cli/
extended_commands.rs