apr_cli/
extended_commands.rs

1/// Extended CLI commands (analysis, profiling, QA, benchmarks, and advanced tools).
2///
3/// Flattened into `Commands` via `#[command(flatten)]` so all subcommands remain
4/// top-level from the user's perspective (e.g., `apr chat`, `apr profile`).
5#[derive(Subcommand, Debug)]
6pub enum ExtendedCommands {
7    /// Interactive chat with language model
8    Chat {
9        /// Path to .apr model file
10        #[arg(value_name = "FILE")]
11        file: PathBuf,
12        /// Sampling temperature (0 = greedy, higher = more random)
13        #[arg(long, default_value = "0.7")]
14        temperature: f32,
15        /// Nucleus sampling threshold
16        #[arg(long, default_value = "0.9")]
17        top_p: f32,
18        /// Maximum tokens to generate per response
19        #[arg(long, default_value = "512")]
20        max_tokens: usize,
21        /// System prompt to set model behavior
22        #[arg(long)]
23        system: Option<String>,
24        /// Show inspection info (top-k probs, tokens/sec)
25        #[arg(long)]
26        inspect: bool,
27        /// Disable GPU acceleration (use CPU)
28        #[arg(long)]
29        no_gpu: bool,
30        /// Force GPU acceleration (requires CUDA)
31        #[arg(long)]
32        gpu: bool,
33        /// Enable inference tracing (APR-TRACE-001)
34        #[arg(long)]
35        trace: bool,
36        /// Trace specific steps only (comma-separated)
37        #[arg(long, value_delimiter = ',')]
38        trace_steps: Option<Vec<String>>,
39        /// Verbose tracing
40        #[arg(long)]
41        trace_verbose: bool,
42        /// Save trace output to JSON file
43        #[arg(long, value_name = "FILE")]
44        trace_output: Option<PathBuf>,
45        /// Trace detail level (none, basic, layer, payload)
46        #[arg(long, value_name = "LEVEL", default_value = "basic")]
47        trace_level: String,
48        /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
49        #[arg(long)]
50        profile: bool,
51        /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
52        #[arg(long, value_name = "BACKEND")]
53        backend: Option<String>,
54    },
55    /// Benchmark throughput (spec H12: >= 10 tok/s)
56    Bench {
57        /// Path to model file
58        #[arg(value_name = "FILE")]
59        file: PathBuf,
60        /// Number of warmup iterations
61        #[arg(long, default_value = "3")]
62        warmup: usize,
63        /// Number of measurement iterations
64        #[arg(long, default_value = "5")]
65        iterations: usize,
66        /// Max tokens to generate per iteration
67        #[arg(long, default_value = "32")]
68        max_tokens: usize,
69        /// Test prompt
70        #[arg(long)]
71        prompt: Option<String>,
72        /// Use realizar for fast inference (vs aprender baseline)
73        #[arg(long)]
74        fast: bool,
75        /// Benchmark specific brick
76        #[arg(long)]
77        brick: Option<String>,
78        /// Comma-separated latency percentile points for JSON output
79        /// (CRUX-E-07). Default: `50,95,99`. Values must be in (0, 100].
80        #[arg(long, value_delimiter = ',', default_value = "50,95,99")]
81        percentiles: Vec<f64>,
82    },
83    /// Evaluate model perplexity (spec H13: PPL <= 20) or classification metrics
84    Eval {
85        /// Path to model file or checkpoint directory
86        #[arg(value_name = "FILE")]
87        file: PathBuf,
88        /// Dataset: wikitext-2, lambada, or custom
89        #[arg(long, default_value = "wikitext-2")]
90        dataset: String,
91        /// Custom text (when dataset=custom)
92        #[arg(long)]
93        text: Option<String>,
94        /// Maximum tokens to evaluate
95        #[arg(long, default_value = "512")]
96        max_tokens: usize,
97        /// Perplexity threshold for pass/fail
98        #[arg(long, default_value = "20.0")]
99        threshold: f32,
100        /// Task type: omit for perplexity, "classify" for classification eval
101        #[arg(long)]
102        task: Option<String>,
103        /// Test data file (JSONL) for classification evaluation
104        #[arg(long, value_name = "FILE")]
105        data: Option<PathBuf>,
106        /// Model size hint: "0.5B", "tiny" (for classification eval)
107        #[arg(long)]
108        model_size: Option<String>,
109        /// Number of output classes (default: 5)
110        #[arg(long, default_value = "5")]
111        num_classes: usize,
112        /// Generate HuggingFace model card (README.md) in checkpoint dir
113        #[arg(long)]
114        generate_card: bool,
115        /// Device for inference: "cpu" (default) or "cuda" (GPU-accelerated, ALB-089)
116        #[arg(long, default_value = "cpu")]
117        device: String,
118        /// Number of samples per problem for pass@k (ALB-088, default: 1)
119        #[arg(long, default_value = "1")]
120        samples: usize,
121        /// Sampling temperature (0.0 = greedy, 0.8 = standard for pass@k>1)
122        #[arg(long, default_value = "0.0")]
123        temperature: f32,
124    },
125    /// Deep profiling with Roofline analysis
126    Profile {
127        /// Path to model file
128        #[arg(value_name = "FILE")]
129        file: PathBuf,
130        /// Layer-by-layer granular analysis
131        #[arg(long)]
132        granular: bool,
133        /// Output format (human, json, flamegraph)
134        #[arg(long, default_value = "human")]
135        format: String,
136        /// Focus on specific operation
137        #[arg(long)]
138        focus: Option<String>,
139        /// Detect naive implementations
140        #[arg(long)]
141        detect_naive: bool,
142        /// GFLOPS threshold for naive detection
143        #[arg(long, default_value = "10.0")]
144        threshold: f64,
145        /// Compare against HuggingFace baseline
146        #[arg(long)]
147        compare_hf: Option<String>,
148        /// Measure energy consumption (requires RAPL)
149        #[arg(long)]
150        energy: bool,
151        /// Compute performance grade (vs Ollama baseline)
152        #[arg(long)]
153        perf_grade: bool,
154        /// Show call graph
155        #[arg(long)]
156        callgraph: bool,
157        /// Exit non-zero if naive implementation detected
158        #[arg(long)]
159        fail_on_naive: bool,
160        /// Output file path for flamegraph SVG (GH-174, PMAT-182)
161        #[arg(long, short = 'o')]
162        output: Option<PathBuf>,
163
164        // PMAT-192: CI Assertion Mode (GH-180)
165        /// Enable CI mode with assertion checks (exits 1 on failure)
166        #[arg(long)]
167        ci: bool,
168        /// Minimum throughput in tok/s (CI assertion, exits 1 if below)
169        #[arg(long)]
170        assert_throughput: Option<f64>,
171        /// Maximum p99 latency in ms (CI assertion, exits 1 if above)
172        #[arg(long)]
173        assert_p99: Option<f64>,
174        /// Maximum p50 latency in ms (CI assertion, exits 1 if above)
175        #[arg(long)]
176        assert_p50: Option<f64>,
177        /// Warmup passes before measurement (default: 3)
178        #[arg(long, default_value = "3")]
179        warmup: usize,
180        /// Measurement passes (default: 10)
181        #[arg(long, default_value = "10")]
182        measure: usize,
183        /// Number of tokens to generate per measurement pass (default: 32)
184        #[arg(long, default_value = "32")]
185        tokens: usize,
186        /// Compare against Ollama baseline (runs ollama for comparison)
187        #[arg(long)]
188        ollama: bool,
189        /// Disable GPU (force CPU-only profiling)
190        #[arg(long)]
191        no_gpu: bool,
192        /// Compare against another model format (F-PROFILE-011)
193        #[arg(long, value_name = "FILE")]
194        compare: Option<PathBuf>,
195    },
196    /// Falsifiable QA checklist for model releases
197    Qa {
198        /// Path to model file
199        #[arg(value_name = "FILE")]
200        file: PathBuf,
201        /// Minimum throughput threshold in tok/s
202        #[arg(long, value_name = "TPS")]
203        assert_tps: Option<f64>,
204        /// Minimum speedup vs Ollama
205        #[arg(long, value_name = "SPEEDUP")]
206        assert_speedup: Option<f64>,
207        /// Minimum GPU vs CPU speedup (F-PERF-042)
208        #[arg(long, value_name = "SPEEDUP")]
209        assert_gpu_speedup: Option<f64>,
210        /// Skip golden output test
211        #[arg(long)]
212        skip_golden: bool,
213        /// Skip throughput benchmark
214        #[arg(long)]
215        skip_throughput: bool,
216        /// Skip Ollama parity comparison
217        #[arg(long)]
218        skip_ollama: bool,
219        /// Skip GPU vs CPU speedup test (F-PERF-042)
220        #[arg(long)]
221        skip_gpu_speedup: bool,
222        /// Skip tensor contract validation (PMAT-235)
223        #[arg(long)]
224        skip_contract: bool,
225        /// Skip cross-format parity test (F-QUAL-032)
226        #[arg(long)]
227        skip_format_parity: bool,
228        /// Skip PTX parity validation (GH-219)
229        #[arg(long)]
230        skip_ptx_parity: bool,
231        /// SafeTensors model path for cross-format parity test (F-QUAL-032)
232        #[arg(long, value_name = "PATH")]
233        safetensors_path: Option<PathBuf>,
234        /// Number of benchmark iterations
235        #[arg(long, default_value = "10")]
236        iterations: usize,
237        /// Number of warmup iterations
238        #[arg(long, default_value = "3")]
239        warmup: usize,
240        /// Maximum tokens to generate
241        #[arg(long, default_value = "32")]
242        max_tokens: usize,
243        /// Output as JSON (for CI integration)
244        #[arg(long)]
245        json: bool,
246        /// Verbose output
247        #[arg(short, long)]
248        verbose: bool,
249        /// Minimum number of gates that must execute (fail if fewer)
250        #[arg(long, value_name = "N")]
251        min_executed: Option<usize>,
252        /// Previous QA report for regression detection
253        #[arg(long, value_name = "FILE")]
254        previous_report: Option<PathBuf>,
255        /// Maximum allowed performance regression ratio (default: 0.10 = 10%)
256        #[arg(long, value_name = "RATIO")]
257        regression_threshold: Option<f64>,
258        /// Skip GPU state isolation test
259        #[arg(long)]
260        skip_gpu_state: bool,
261        /// Skip metadata plausibility validation (Bug 210, GH-222)
262        #[arg(long)]
263        skip_metadata: bool,
264        /// Skip GPU capability match gate (GH-280)
265        #[arg(long)]
266        skip_capability: bool,
267        /// Assert classifier head presence and shape (F-CLASS-004)
268        #[arg(long)]
269        assert_classifier_head: bool,
270    },
271    /// GPU/CPU parity check (PMAT-232: genchi genbutsu — see where GPU diverges)
272    Parity {
273        /// Path to GGUF model file
274        #[arg(value_name = "FILE")]
275        file: PathBuf,
276        /// Prompt text (default: "What is 2+2?")
277        #[arg(short, long, default_value = "What is 2+2?")]
278        prompt: String,
279        /// Assert parity (exit non-zero on divergence)
280        #[arg(long)]
281        assert: bool,
282    },
283    /// Model-to-PTX source mapping (Mieruka: make GPU kernel dispatch visible)
284    #[command(name = "ptx-map")]
285    PtxMap {
286        /// Path to GGUF model file
287        #[arg(value_name = "FILE")]
288        file: PathBuf,
289        /// Filter to specific kernel (e.g., --kernel Q4KGemv)
290        #[arg(long)]
291        kernel: Option<String>,
292        /// Reverse lookup: kernel name -> which layers/steps use it
293        #[arg(long)]
294        reverse: Option<String>,
295        /// Output as JSON
296        #[arg(long)]
297        json: bool,
298        /// Full PTX snippets and detailed analysis
299        #[arg(short, long)]
300        verbose: bool,
301        /// Show batched prefill kernel variants instead of decode
302        #[arg(long)]
303        prefill: bool,
304    },
305    /// PTX analysis and bug detection (register pressure, roofline)
306    #[command(name = "ptx")]
307    Ptx {
308        /// Path to a PTX source file
309        #[arg(value_name = "FILE")]
310        file: Option<PathBuf>,
311        /// Analyze a named kernel from trueno-gpu
312        #[arg(long, short)]
313        kernel: Option<String>,
314        /// Strict mode (no performance whitelist)
315        #[arg(long)]
316        strict: bool,
317        /// Show only bug analysis (skip register/memory/roofline)
318        #[arg(long)]
319        bugs: bool,
320        /// Output as JSON
321        #[arg(long)]
322        json: bool,
323        /// Verbose output (include PTX source listing)
324        #[arg(short, long)]
325        verbose: bool,
326    },
327    /// ML tuning: LoRA/QLoRA configuration, memory planning, and HPO (GH-176, SPEC-TUNE-2026-001)
328    #[cfg(feature = "training")]
329    Tune {
330        /// Path to model file (optional if using --model)
331        #[arg(value_name = "FILE")]
332        file: Option<PathBuf>,
333        /// Tuning method: auto, full, lora, qlora
334        #[arg(long, short = 'm', default_value = "auto")]
335        method: String,
336        /// LoRA rank (default: auto-selected)
337        #[arg(long, short = 'r')]
338        rank: Option<u32>,
339        /// Available VRAM in GB
340        #[arg(long, default_value = "16.0")]
341        vram: f64,
342        /// Only plan configuration, don't train
343        #[arg(long)]
344        plan: bool,
345        /// Model size for planning (e.g., "7B", "1.5B")
346        #[arg(long, value_name = "SIZE")]
347        model: Option<String>,
348        /// Freeze base model weights
349        #[arg(long)]
350        freeze_base: bool,
351        /// Training data file (JSONL format)
352        #[arg(long, value_name = "FILE")]
353        train_data: Option<PathBuf>,
354        /// Output as JSON (for CI integration)
355        #[arg(long)]
356        json: bool,
357        /// Task type for HPO: classify (SPEC-TUNE-2026-001)
358        #[arg(long)]
359        task: Option<String>,
360        /// Number of HPO trials (default: 10)
361        #[arg(long, default_value = "10")]
362        budget: usize,
363        /// HPO search strategy: tpe, grid, random
364        #[arg(long, default_value = "tpe")]
365        strategy: String,
366        /// HPO scheduler: asha, median, none
367        #[arg(long, default_value = "asha")]
368        scheduler: String,
369        /// Scout mode: 1 epoch per trial for fast exploration
370        #[arg(long)]
371        scout: bool,
372        /// Training data file for HPO (JSONL format)
373        #[arg(long, value_name = "FILE")]
374        data: Option<PathBuf>,
375        /// Number of output classes for classification
376        #[arg(long, default_value = "5")]
377        num_classes: usize,
378        /// Model size hint for HPO (e.g., "0.5B", "1.5B")
379        #[arg(long)]
380        model_size: Option<String>,
381        /// Warm-start from scout phase results directory
382        #[arg(long, value_name = "DIR")]
383        from_scout: Option<PathBuf>,
384        /// Maximum epochs per trial (full mode, default: 20)
385        #[arg(long, default_value = "20")]
386        max_epochs: usize,
387        /// Maximum wall-clock time (e.g., "8h", "30m")
388        #[arg(long)]
389        time_limit: Option<String>,
390    },
391    /// Attach live TUI to a running training session
392    #[cfg(feature = "training")]
393    Monitor {
394        /// Experiment output directory (same as finetune -o)
395        #[arg(value_name = "DIR")]
396        dir: Option<PathBuf>,
397        /// Refresh interval in milliseconds
398        #[arg(long, default_value = "500")]
399        refresh_ms: u64,
400        /// Compact display mode
401        #[arg(long)]
402        compact: bool,
403        /// Output JSON lines instead of TUI (for LLM agents and CI)
404        #[arg(long)]
405        json: bool,
406        /// Output format: tui (default), json, text
407        #[arg(long, default_value = "tui")]
408        format: String,
409    },
410    /// List, show, and compare training experiment runs
411    #[cfg(feature = "training")]
412    Runs {
413        #[command(subcommand)]
414        command: RunsCommands,
415    },
416    /// Interactive experiment browser (TUI with loss curves)
417    #[cfg(feature = "training")]
418    Experiment {
419        #[command(subcommand)]
420        command: ExperimentCommands,
421    },
422    /// ComputeBrick pipeline monitor (cbtop)
423    Cbtop {
424        /// Model name (e.g., qwen2.5-coder-1.5b)
425        #[arg(long)]
426        model: Option<String>,
427        /// Attach to running realizar process
428        #[arg(long)]
429        attach: Option<String>,
430        /// Path to GGUF model file for real profiling
431        #[arg(long, value_name = "MODEL")]
432        model_path: Option<PathBuf>,
433        /// Run in headless mode (no TUI, for CI/automation)
434        #[arg(long)]
435        headless: bool,
436        /// Output JSON format (requires --headless)
437        #[arg(long)]
438        json: bool,
439        /// Output file path (requires --headless)
440        #[arg(long, value_name = "FILE")]
441        output: Option<PathBuf>,
442        /// CI mode: exit with code 1 if thresholds not met
443        #[arg(long)]
444        ci: bool,
445        /// Minimum throughput threshold in tok/s (for --ci)
446        #[arg(long, value_name = "TOK_S")]
447        throughput: Option<f64>,
448        /// Minimum brick score threshold 0-100 (for --ci)
449        #[arg(long, value_name = "SCORE")]
450        brick_score: Option<u32>,
451        /// Number of warmup iterations before measurement
452        #[arg(long, default_value = "10")]
453        warmup: usize,
454        /// Number of measurement iterations
455        #[arg(long, default_value = "100")]
456        iterations: usize,
457        /// PAR-100: Enable speculative decoding benchmark
458        #[arg(long)]
459        speculative: bool,
460        /// PAR-100: Number of tokens to draft speculatively (default: 4)
461        #[arg(long, default_value = "4")]
462        speculation_k: usize,
463        /// PAR-099: Path to draft model for speculative decoding
464        #[arg(long, value_name = "DRAFT_MODEL")]
465        draft_model: Option<PathBuf>,
466        /// PAR-102: Number of concurrent requests
467        #[arg(long, default_value = "1")]
468        concurrent: usize,
469        /// Use simulated data (for CI testing only)
470        #[arg(long)]
471        simulated: bool,
472    },
473    /// Export for probar visual regression testing (PMAT-481)
474    Probar {
475        /// Path to .apr model file
476        #[arg(value_name = "FILE")]
477        file: PathBuf,
478        /// Output directory for test artifacts
479        #[arg(short, long, default_value = "./probar-export")]
480        output: PathBuf,
481        /// Export format: json, png, or both
482        #[arg(long, default_value = "both")]
483        format: String,
484        /// Golden reference directory for comparison
485        #[arg(long)]
486        golden: Option<PathBuf>,
487        /// Filter layers by name pattern
488        #[arg(long)]
489        layer: Option<String>,
490        /// Exit non-zero on golden divergence (CI mode, PMAT-481)
491        #[arg(long)]
492        assert: bool,
493        /// Cosine similarity threshold for golden comparison (default: 0.98)
494        #[arg(long, default_value = "0.98")]
495        tolerance: f32,
496    },
497    /// Compare APR model against HuggingFace source
498    #[command(name = "compare-hf")]
499    CompareHf {
500        /// Path to .apr model file
501        #[arg(value_name = "FILE")]
502        file: PathBuf,
503        /// HuggingFace repo ID (e.g., openai/whisper-tiny)
504        #[arg(long)]
505        hf: String,
506        /// Filter tensors by name pattern
507        #[arg(long)]
508        tensor: Option<String>,
509        /// Comparison threshold (default: 1e-5)
510        #[arg(long, default_value = "1e-5")]
511        threshold: f64,
512        /// Output as JSON
513        #[arg(long)]
514        json: bool,
515    },
516    /// Format-aware binary forensics (10X better than xxd)
517    Hex {
518        /// Path to model file (APR, GGUF, or SafeTensors)
519        #[arg(value_name = "FILE")]
520        file: PathBuf,
521        /// Filter tensors by name pattern
522        #[arg(long)]
523        tensor: Option<String>,
524        /// Limit bytes/values to display
525        #[arg(long, default_value = "64")]
526        limit: usize,
527        /// Show tensor statistics
528        #[arg(long)]
529        stats: bool,
530        /// List tensor names only
531        #[arg(long)]
532        list: bool,
533        /// Output as JSON
534        #[arg(long)]
535        json: bool,
536        /// Annotated file header (magic, version, tensor count, metadata)
537        #[arg(long)]
538        header: bool,
539        /// Q4K/Q6K/Q8_0 super-block structure with field annotations
540        #[arg(long)]
541        blocks: bool,
542        /// Value histogram + entropy + kurtosis analysis
543        #[arg(long)]
544        distribution: bool,
545        /// Layout contract verification overlay per tensor
546        #[arg(long)]
547        contract: bool,
548        /// Per-region byte entropy analysis
549        #[arg(long)]
550        entropy: bool,
551        /// Raw bytes (like xxd but format-aware, with ASCII column)
552        #[arg(long)]
553        raw: bool,
554        /// Start at byte offset (supports 0x prefix for hex)
555        #[arg(long, default_value = "0")]
556        offset: String,
557        /// Bytes per row for raw output (default: 16)
558        #[arg(long, default_value = "16")]
559        width: usize,
560        /// Slice range for partial tensor reads (e.g., 0:3 for first 3 elements)
561        #[arg(long)]
562        slice: Option<String>,
563    },
564    /// Model architecture tree view
565    Tree {
566        /// Path to .apr model file
567        #[arg(value_name = "FILE")]
568        file: PathBuf,
569        /// Filter by component pattern
570        #[arg(long)]
571        filter: Option<String>,
572        /// Output format: ascii, dot, mermaid, json
573        #[arg(long, default_value = "ascii")]
574        format: String,
575        /// Show tensor sizes
576        #[arg(long)]
577        sizes: bool,
578        /// Maximum tree depth
579        #[arg(long)]
580        depth: Option<usize>,
581    },
582    /// Data flow visualization
583    Flow {
584        /// Path to .apr model file
585        #[arg(value_name = "FILE")]
586        file: PathBuf,
587        /// Filter by layer pattern
588        #[arg(long)]
589        layer: Option<String>,
590        /// Component to visualize: full, encoder, decoder, etc.
591        #[arg(long, default_value = "full")]
592        component: String,
593        /// Verbose output with statistics
594        #[arg(short, long)]
595        verbose: bool,
596        /// Output as JSON
597        #[arg(long)]
598        json: bool,
599    },
600    /// Cross-subcommand smoke test (does every tool handle this model?)
601    Qualify {
602        /// Path to model file (APR, GGUF, or SafeTensors)
603        #[arg(value_name = "FILE")]
604        file: PathBuf,
605        /// Testing tier: smoke (Phase 1), standard (+contracts), full (+playbook)
606        #[arg(long, default_value = "smoke")]
607        tier: String,
608        /// Timeout per gate in seconds
609        #[arg(long, default_value = "120")]
610        timeout: u64,
611        /// Output as JSON
612        #[arg(long)]
613        json: bool,
614        /// Show subcommand output (disable stdout suppression)
615        #[arg(short, long)]
616        verbose: bool,
617        /// Skip specific gates (comma-separated)
618        #[arg(long, value_delimiter = ',')]
619        skip: Option<Vec<String>>,
620    },
621    /// Training pipeline (plan/apply) — forjar-style pre-flight validation
622    #[cfg(feature = "training")]
623    Train {
624        #[command(subcommand)]
625        command: TrainCommands,
626    },
627    /// Pretraining loop driver (SHIP-TWO-001 MODEL-2).
628    ///
629    /// Wires the pretraining loop shape defined by
630    /// `contracts/training-loop-pretrain-v1.yaml`. Executes a synthetic
631    /// decreasing-loss drive by default so GATE-TRAIN-005 / -007 / -008
632    /// divergence-and-NaN guards can be exercised without an actual
633    /// 370M compute run. Real corpus wiring is a follow-up ticket.
634    #[cfg(feature = "training")]
635    Pretrain {
636        /// Dataset path (tokenized shard index or raw corpus).
637        #[arg(long, value_name = "PATH")]
638        dataset: PathBuf,
639        /// Tokenizer directory (vocab.json + merges.txt).
640        #[arg(long, value_name = "DIR")]
641        tokenizer: PathBuf,
642        /// Run output directory — checkpoints + metadata go to `{run_dir}/ckpt/`.
643        #[arg(long, value_name = "DIR")]
644        run_dir: PathBuf,
645        /// Training regime — finetune (MODEL-1) or from-scratch (MODEL-2 cold start).
646        /// Per contract training-loop-pretrain-v1 §hyperparameter_defaults,
647        /// this atomically flips (regime, lr_max, warmup_steps, target_val_loss)
648        /// unless explicit --lr / --warmup-steps / --target-val-loss override.
649        #[arg(long, value_enum, default_value = "finetune")]
650        mode: PretrainMode,
651        /// Peak learning rate after warmup. Omit to inherit mode default
652        /// (finetune: 5e-5, from-scratch: 3e-4).
653        #[arg(long)]
654        lr: Option<f32>,
655        /// Warmup + cosine decay total steps.
656        #[arg(long, default_value = "1000")]
657        num_steps: usize,
658        /// Number of warmup steps. Omit to inherit mode default
659        /// (finetune: 100, from-scratch: 1000).
660        #[arg(long)]
661        warmup_steps: Option<usize>,
662        /// Micro-batch size.
663        #[arg(long, default_value = "16")]
664        batch_size: usize,
665        /// Sequence length per example.
666        #[arg(long, default_value = "1024")]
667        seq_length: usize,
668        /// Steps per epoch — controls per-epoch artifact cadence.
669        #[arg(long, default_value = "100")]
670        steps_per_epoch: usize,
671        /// GATE-TRAIN-006 fixed RNG seed.
672        #[arg(long, default_value = "42")]
673        seed: u64,
674        /// Target val_loss. Omit to inherit mode default
675        /// (finetune: 2.2, from-scratch: 3.0).
676        #[arg(long)]
677        target_val_loss: Option<f32>,
678        /// Vocabulary size (required for `--mode from-scratch` INV-TRAIN-005
679        /// regime-dependent cap: 2·ln(vocab_size)). MODEL-2 uses 50257.
680        #[arg(long, default_value = "50257")]
681        vocab_size: u32,
682        /// Synthetic-drive only — do not attempt real compute, exercise loop gates only.
683        /// INV-TRAIN-010: absent = real compute (drive_real), present = synthetic (drive_synthetic).
684        #[arg(long, action = clap::ArgAction::SetTrue)]
685        synthetic: bool,
686        /// Training backend. Grammar (contract gpu-training-backend-v1
687        /// INV-GPUTRAIN-001): `^(cpu|cuda(:[0-9]|:1[0-5])?|auto)$`.
688        /// Default `auto` uses CUDA if available, else CPU (the only
689        /// spelling that may fall back silently — all other values
690        /// hard-fail on missing runtime per GATE-GPUTRAIN-002).
691        #[arg(long, default_value = "auto")]
692        device: String,
693        /// Initial weights from a pretrained APR file
694        /// (contract `apr-pretrain-from-init-v1`). Per spec §49's
695        /// MODEL-2 pretrained-init pivot: when present, load weights
696        /// from `<PATH>` instead of random-init. Composes with
697        /// `--mode finetune` (canonical) or `--mode from-scratch`
698        /// (allowed but non-canonical — emits a warning). Missing,
699        /// corrupted, or arch-mismatched APR files exit non-zero
700        /// before step 1 (no silent random-init fallback).
701        #[arg(long, value_name = "PATH")]
702        init: Option<PathBuf>,
703    },
704    /// Tokenizer training pipeline (plan/apply) — BPE vocabulary learning
705    Tokenize {
706        #[command(subcommand)]
707        command: TokenizeCommands,
708    },
709    /// Data quality pipeline (audit, split, balance) — powered by alimentar
710    Data {
711        #[command(subcommand)]
712        command: DataCommands,
713    },
714    /// Pipeline orchestration (plan/apply/status) — wraps forjar DAG engine
715    Pipeline {
716        #[command(subcommand)]
717        command: PipelineCommands,
718    },
719    /// Automated Five Whys diagnosis on a training checkpoint
720    Diagnose {
721        /// Path to checkpoint directory
722        #[arg(value_name = "CHECKPOINT_DIR")]
723        checkpoint_dir: PathBuf,
724        /// Test data file (JSONL) for evaluation
725        #[arg(long, value_name = "FILE")]
726        data: Option<PathBuf>,
727        /// Model size hint: "0.5B", "tiny"
728        #[arg(long)]
729        model_size: Option<String>,
730        /// Number of output classes (default: 5)
731        #[arg(long, default_value = "5")]
732        num_classes: usize,
733    },
734    /// Lint an Ollama /api/chat response for schema + NDJSON invariants (CRUX-C-04)
735    OllamaChatLint {
736        /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
737        #[arg(long, value_name = "FILE")]
738        response_file: PathBuf,
739        /// Treat input as NDJSON stream (one frame per line)
740        #[arg(long)]
741        stream: bool,
742    },
743    /// Lint an Ollama /api/chat function-calling response (CRUX-I-04)
744    OllamaToolsLint {
745        /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
746        #[arg(long, value_name = "FILE")]
747        response_file: PathBuf,
748        /// Optional captured request JSON — enables tool-name allowlist gate
749        /// (every called tool name must appear in request.tools[*].function.name)
750        #[arg(long, value_name = "FILE")]
751        request_file: Option<PathBuf>,
752        /// Treat input as NDJSON stream (one frame per line)
753        #[arg(long)]
754        stream: bool,
755    },
756    /// Lint a captured DRY-sampling observation (CRUX-C-23)
757    DrySamplingLint {
758        /// Path to observation JSON
759        #[arg(long, value_name = "FILE")]
760        observation_file: PathBuf,
761    },
762    /// Lint a captured AWQ quality/compression/flags observation (CRUX-B-08)
763    AwqLint {
764        /// Path to captured AWQ observation JSON
765        #[arg(long, value_name = "FILE")]
766        observation_file: PathBuf,
767    },
768    /// Lint a captured FP8 (E4M3) round-trip + SM-capability observation (CRUX-B-11)
769    Fp8Lint {
770        /// Path to captured observation JSON (frobenius, capability blocks)
771        #[arg(long, value_name = "FILE")]
772        observation_file: PathBuf,
773    },
774    /// Lint a captured NF4 codebook/roundtrip/storage/parity observation (CRUX-B-10)
775    Nf4Lint {
776        /// Path to captured NF4 observation JSON
777        #[arg(long, value_name = "FILE")]
778        observation_file: PathBuf,
779    },
780    /// Lint a captured GPTQ compression/cosine/flags observation (CRUX-B-09)
781    GptqLint {
782        /// Path to captured GPTQ observation JSON
783        #[arg(long, value_name = "FILE")]
784        observation_file: PathBuf,
785    },
786    /// Lint a captured CUDA OOM postmortem report (CRUX-F-13)
787    OomLint {
788        /// Path to captured OOM postmortem JSON (e.g. /tmp/apr-oom-<ts>.json)
789        #[arg(long, value_name = "FILE")]
790        report_file: PathBuf,
791        /// Optional captured stderr log to verify the OOM_REPORT breadcrumb
792        #[arg(long, value_name = "FILE")]
793        stderr_file: Option<PathBuf>,
794    },
795    /// Lint a captured OpenAI tool-use response (CRUX-C-11)
796    ToolUseLint {
797        /// Path to captured OpenAI tool-use response JSON
798        #[arg(long, value_name = "FILE")]
799        observation_file: PathBuf,
800    },
801    /// Lint a GBNF grammar-constrained observation (CRUX-C-10)
802    GbnfLint {
803        /// Path to captured GBNF observation JSON
804        #[arg(long, value_name = "FILE")]
805        observation_file: PathBuf,
806    },
807    /// Lint a typical-p sampling observation (CRUX-C-22)
808    TypicalPLint {
809        #[arg(long, value_name = "FILE")]
810        observation_file: PathBuf,
811    },
812    /// Gradient-norm telemetry analysis (CRUX-F-09)
813    GradNorm {
814        /// Path to JSON file of per-step grad-norm records
815        #[arg(long, value_name = "FILE")]
816        history_file: PathBuf,
817        /// Maximum allowed clipped grad-norm (for cap-violation check)
818        #[arg(long, value_name = "M")]
819        max_grad_norm: Option<f64>,
820        /// Rolling-median window size for spike detection (in steps)
821        #[arg(long, default_value = "16")]
822        spike_window: usize,
823        /// Multiplier threshold for spike detection
824        #[arg(long, default_value = "10.0")]
825        spike_multiplier: f64,
826    },
827    /// Lint a captured registry byte-quota observation (CRUX-A-22)
828    RegistryQuotaLint {
829        /// Path to captured quota/atomic/ceiling observation JSON
830        #[arg(long, value_name = "FILE")]
831        observation_file: PathBuf,
832    },
833    /// Lint a captured imatrix calibration observation (CRUX-B-07)
834    ImatrixLint {
835        /// Path to captured imatrix observation JSON
836        #[arg(long, value_name = "FILE")]
837        observation_file: PathBuf,
838    },
839    /// Lint a captured /v1/embeddings observation (CRUX-C-13)
840    EmbeddingsLint {
841        #[arg(long, value_name = "FILE")]
842        observation_file: PathBuf,
843    },
844    /// Lint a captured Hub+local unified-search merge observation (CRUX-A-23)
845    UnifiedSearchLint {
846        /// Path to captured unified-search observation JSON
847        #[arg(long, value_name = "FILE")]
848        observation_file: PathBuf,
849    },
850    /// Lint a captured `apr rm` / `apr gc` blob-GC observation (CRUX-A-25)
851    RmGcLint {
852        /// Path to captured rm/gc observation JSON
853        #[arg(long, value_name = "FILE")]
854        observation_file: PathBuf,
855    },
856    /// Lint a captured APR_MODELS shared-cache observation (CRUX-A-21)
857    SharedCacheLint {
858        /// Path to captured dedup/permission observation JSON
859        #[arg(long, value_name = "FILE")]
860        observation_file: PathBuf,
861    },
862    /// Publishing, conversion, and analysis tools
863    #[command(flatten)]
864    Tools(ToolCommands),
865}
866
867#[cfg(feature = "training")]
868/// Subcommands for `apr runs` — experiment run management (ALB-050/051)
869#[derive(Subcommand, Debug)]
870pub enum RunsCommands {
871    /// List all training experiment runs (with inline loss sparklines)
872    Ls {
873        /// Directory to scan for experiments (default: current dir)
874        #[arg(long, value_name = "DIR")]
875        dir: Option<PathBuf>,
876        /// Read from global experiment registry (~/.entrenar/experiments.db)
877        #[arg(long)]
878        global: bool,
879        /// Filter by status: running, completed, failed, all
880        #[arg(long, default_value = "all")]
881        status: String,
882        /// Output as JSON
883        #[arg(long)]
884        json: bool,
885        /// Maximum number of runs to show
886        #[arg(long, default_value = "50")]
887        limit: usize,
888    },
889    /// Show detailed metrics for a specific run (with braille loss curve)
890    Show {
891        /// Run ID
892        #[arg(value_name = "RUN_ID")]
893        run_id: String,
894        /// Directory containing experiment DB
895        #[arg(long, value_name = "DIR")]
896        dir: Option<PathBuf>,
897        /// Read from global registry
898        #[arg(long)]
899        global: bool,
900        /// Output as JSON
901        #[arg(long)]
902        json: bool,
903    },
904    /// Compare two runs side-by-side (loss curves, config diff, metrics)
905    Diff {
906        /// First run ID
907        #[arg(value_name = "RUN_A")]
908        run_a: String,
909        /// Second run ID
910        #[arg(value_name = "RUN_B")]
911        run_b: String,
912        /// Directory containing experiment DB
913        #[arg(long, value_name = "DIR")]
914        dir: Option<PathBuf>,
915        /// Read from global registry
916        #[arg(long)]
917        global: bool,
918        /// Output as JSON
919        #[arg(long)]
920        json: bool,
921    },
922}
923
924#[cfg(feature = "training")]
925/// Subcommands for `apr experiment` — interactive experiment browser (ALB-024)
926#[derive(Subcommand, Debug)]
927pub enum ExperimentCommands {
928    /// Browse experiment history with interactive TUI (loss curves, params)
929    View {
930        /// Path to experiment database file
931        #[arg(long, value_name = "FILE")]
932        db: Option<PathBuf>,
933        /// Read from global experiment registry (~/.entrenar/experiments.db)
934        #[arg(long)]
935        global: bool,
936        /// Output as JSON (non-interactive)
937        #[arg(long)]
938        json: bool,
939    },
940}
apr_cli/extended_commands.rs

apr_cli/
extended_commands.rs