apr_cli/extended_commands.rs
1/// Extended CLI commands (analysis, profiling, QA, benchmarks, and advanced tools).
2///
3/// Flattened into `Commands` via `#[command(flatten)]` so all subcommands remain
4/// top-level from the user's perspective (e.g., `apr chat`, `apr profile`).
5#[derive(Subcommand, Debug)]
6pub enum ExtendedCommands {
7 /// Interactive chat with language model
8 Chat {
9 /// Path to .apr model file
10 #[arg(value_name = "FILE")]
11 file: PathBuf,
12 /// Sampling temperature (0 = greedy, higher = more random)
13 #[arg(long, default_value = "0.7")]
14 temperature: f32,
15 /// Nucleus sampling threshold
16 #[arg(long, default_value = "0.9")]
17 top_p: f32,
18 /// Maximum tokens to generate per response
19 #[arg(long, default_value = "512")]
20 max_tokens: usize,
21 /// System prompt to set model behavior
22 #[arg(long)]
23 system: Option<String>,
24 /// Show inspection info (top-k probs, tokens/sec)
25 #[arg(long)]
26 inspect: bool,
27 /// Disable GPU acceleration (use CPU)
28 #[arg(long)]
29 no_gpu: bool,
30 /// Force GPU acceleration (requires CUDA)
31 #[arg(long)]
32 gpu: bool,
33 /// Enable inference tracing (APR-TRACE-001)
34 #[arg(long)]
35 trace: bool,
36 /// Trace specific steps only (comma-separated)
37 #[arg(long, value_delimiter = ',')]
38 trace_steps: Option<Vec<String>>,
39 /// Verbose tracing
40 #[arg(long)]
41 trace_verbose: bool,
42 /// Save trace output to JSON file
43 #[arg(long, value_name = "FILE")]
44 trace_output: Option<PathBuf>,
45 /// Trace detail level (none, basic, layer, payload)
46 #[arg(long, value_name = "LEVEL", default_value = "basic")]
47 trace_level: String,
48 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
49 #[arg(long)]
50 profile: bool,
51 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
52 #[arg(long, value_name = "BACKEND")]
53 backend: Option<String>,
54 },
55 /// Benchmark throughput (spec H12: >= 10 tok/s)
56 Bench {
57 /// Path to model file
58 #[arg(value_name = "FILE")]
59 file: PathBuf,
60 /// Number of warmup iterations
61 #[arg(long, default_value = "3")]
62 warmup: usize,
63 /// Number of measurement iterations
64 #[arg(long, default_value = "5")]
65 iterations: usize,
66 /// Max tokens to generate per iteration
67 #[arg(long, default_value = "32")]
68 max_tokens: usize,
69 /// Test prompt
70 #[arg(long)]
71 prompt: Option<String>,
72 /// Use realizar for fast inference (vs aprender baseline)
73 #[arg(long)]
74 fast: bool,
75 /// Benchmark specific brick
76 #[arg(long)]
77 brick: Option<String>,
78 /// Comma-separated latency percentile points for JSON output
79 /// (CRUX-E-07). Default: `50,95,99`. Values must be in (0, 100].
80 #[arg(long, value_delimiter = ',', default_value = "50,95,99")]
81 percentiles: Vec<f64>,
82 },
83 /// Evaluate model perplexity (spec H13: PPL <= 20) or classification metrics
84 Eval {
85 /// Path to model file or checkpoint directory
86 #[arg(value_name = "FILE")]
87 file: PathBuf,
88 /// Dataset: wikitext-2, lambada, or custom
89 #[arg(long, default_value = "wikitext-2")]
90 dataset: String,
91 /// Custom text (when dataset=custom)
92 #[arg(long)]
93 text: Option<String>,
94 /// Maximum tokens to evaluate
95 #[arg(long, default_value = "512")]
96 max_tokens: usize,
97 /// Perplexity threshold for pass/fail
98 #[arg(long, default_value = "20.0")]
99 threshold: f32,
100 /// Task type: omit for perplexity, "classify" for classification eval
101 #[arg(long)]
102 task: Option<String>,
103 /// Test data file (JSONL) for classification evaluation
104 #[arg(long, value_name = "FILE")]
105 data: Option<PathBuf>,
106 /// Model size hint: "0.5B", "tiny" (for classification eval)
107 #[arg(long)]
108 model_size: Option<String>,
109 /// Number of output classes (default: 5)
110 #[arg(long, default_value = "5")]
111 num_classes: usize,
112 /// Generate HuggingFace model card (README.md) in checkpoint dir
113 #[arg(long)]
114 generate_card: bool,
115 /// Device for inference: "cpu" (default) or "cuda" (GPU-accelerated, ALB-089)
116 #[arg(long, default_value = "cpu")]
117 device: String,
118 /// Number of samples per problem for pass@k (ALB-088, default: 1)
119 #[arg(long, default_value = "1")]
120 samples: usize,
121 /// Sampling temperature (0.0 = greedy, 0.8 = standard for pass@k>1)
122 #[arg(long, default_value = "0.0")]
123 temperature: f32,
124 },
125 /// Deep profiling with Roofline analysis
126 Profile {
127 /// Path to model file
128 #[arg(value_name = "FILE")]
129 file: PathBuf,
130 /// Layer-by-layer granular analysis
131 #[arg(long)]
132 granular: bool,
133 /// Output format (human, json, flamegraph)
134 #[arg(long, default_value = "human")]
135 format: String,
136 /// Focus on specific operation
137 #[arg(long)]
138 focus: Option<String>,
139 /// Detect naive implementations
140 #[arg(long)]
141 detect_naive: bool,
142 /// GFLOPS threshold for naive detection
143 #[arg(long, default_value = "10.0")]
144 threshold: f64,
145 /// Compare against HuggingFace baseline
146 #[arg(long)]
147 compare_hf: Option<String>,
148 /// Measure energy consumption (requires RAPL)
149 #[arg(long)]
150 energy: bool,
151 /// Compute performance grade (vs Ollama baseline)
152 #[arg(long)]
153 perf_grade: bool,
154 /// Show call graph
155 #[arg(long)]
156 callgraph: bool,
157 /// Exit non-zero if naive implementation detected
158 #[arg(long)]
159 fail_on_naive: bool,
160 /// Output file path for flamegraph SVG (GH-174, PMAT-182)
161 #[arg(long, short = 'o')]
162 output: Option<PathBuf>,
163
164 // PMAT-192: CI Assertion Mode (GH-180)
165 /// Enable CI mode with assertion checks (exits 1 on failure)
166 #[arg(long)]
167 ci: bool,
168 /// Minimum throughput in tok/s (CI assertion, exits 1 if below)
169 #[arg(long)]
170 assert_throughput: Option<f64>,
171 /// Maximum p99 latency in ms (CI assertion, exits 1 if above)
172 #[arg(long)]
173 assert_p99: Option<f64>,
174 /// Maximum p50 latency in ms (CI assertion, exits 1 if above)
175 #[arg(long)]
176 assert_p50: Option<f64>,
177 /// Warmup passes before measurement (default: 3)
178 #[arg(long, default_value = "3")]
179 warmup: usize,
180 /// Measurement passes (default: 10)
181 #[arg(long, default_value = "10")]
182 measure: usize,
183 /// Number of tokens to generate per measurement pass (default: 32)
184 #[arg(long, default_value = "32")]
185 tokens: usize,
186 /// Compare against Ollama baseline (runs ollama for comparison)
187 #[arg(long)]
188 ollama: bool,
189 /// Disable GPU (force CPU-only profiling)
190 #[arg(long)]
191 no_gpu: bool,
192 /// Compare against another model format (F-PROFILE-011)
193 #[arg(long, value_name = "FILE")]
194 compare: Option<PathBuf>,
195 },
196 /// Falsifiable QA checklist for model releases
197 Qa {
198 /// Path to model file
199 #[arg(value_name = "FILE")]
200 file: PathBuf,
201 /// Minimum throughput threshold in tok/s
202 #[arg(long, value_name = "TPS")]
203 assert_tps: Option<f64>,
204 /// Minimum speedup vs Ollama
205 #[arg(long, value_name = "SPEEDUP")]
206 assert_speedup: Option<f64>,
207 /// Minimum GPU vs CPU speedup (F-PERF-042)
208 #[arg(long, value_name = "SPEEDUP")]
209 assert_gpu_speedup: Option<f64>,
210 /// Skip golden output test
211 #[arg(long)]
212 skip_golden: bool,
213 /// Skip throughput benchmark
214 #[arg(long)]
215 skip_throughput: bool,
216 /// Skip Ollama parity comparison
217 #[arg(long)]
218 skip_ollama: bool,
219 /// Skip GPU vs CPU speedup test (F-PERF-042)
220 #[arg(long)]
221 skip_gpu_speedup: bool,
222 /// Skip tensor contract validation (PMAT-235)
223 #[arg(long)]
224 skip_contract: bool,
225 /// Skip cross-format parity test (F-QUAL-032)
226 #[arg(long)]
227 skip_format_parity: bool,
228 /// Skip PTX parity validation (GH-219)
229 #[arg(long)]
230 skip_ptx_parity: bool,
231 /// SafeTensors model path for cross-format parity test (F-QUAL-032)
232 #[arg(long, value_name = "PATH")]
233 safetensors_path: Option<PathBuf>,
234 /// Number of benchmark iterations
235 #[arg(long, default_value = "10")]
236 iterations: usize,
237 /// Number of warmup iterations
238 #[arg(long, default_value = "3")]
239 warmup: usize,
240 /// Maximum tokens to generate
241 #[arg(long, default_value = "32")]
242 max_tokens: usize,
243 /// Output as JSON (for CI integration)
244 #[arg(long)]
245 json: bool,
246 /// Verbose output
247 #[arg(short, long)]
248 verbose: bool,
249 /// Minimum number of gates that must execute (fail if fewer)
250 #[arg(long, value_name = "N")]
251 min_executed: Option<usize>,
252 /// Previous QA report for regression detection
253 #[arg(long, value_name = "FILE")]
254 previous_report: Option<PathBuf>,
255 /// Maximum allowed performance regression ratio (default: 0.10 = 10%)
256 #[arg(long, value_name = "RATIO")]
257 regression_threshold: Option<f64>,
258 /// Skip GPU state isolation test
259 #[arg(long)]
260 skip_gpu_state: bool,
261 /// Skip metadata plausibility validation (Bug 210, GH-222)
262 #[arg(long)]
263 skip_metadata: bool,
264 /// Skip GPU capability match gate (GH-280)
265 #[arg(long)]
266 skip_capability: bool,
267 /// Assert classifier head presence and shape (F-CLASS-004)
268 #[arg(long)]
269 assert_classifier_head: bool,
270 },
271 /// GPU/CPU parity check (PMAT-232: genchi genbutsu — see where GPU diverges)
272 Parity {
273 /// Path to GGUF model file
274 #[arg(value_name = "FILE")]
275 file: PathBuf,
276 /// Prompt text (default: "What is 2+2?")
277 #[arg(short, long, default_value = "What is 2+2?")]
278 prompt: String,
279 /// Assert parity (exit non-zero on divergence)
280 #[arg(long)]
281 assert: bool,
282 },
283 /// Model-to-PTX source mapping (Mieruka: make GPU kernel dispatch visible)
284 #[command(name = "ptx-map")]
285 PtxMap {
286 /// Path to GGUF model file
287 #[arg(value_name = "FILE")]
288 file: PathBuf,
289 /// Filter to specific kernel (e.g., --kernel Q4KGemv)
290 #[arg(long)]
291 kernel: Option<String>,
292 /// Reverse lookup: kernel name -> which layers/steps use it
293 #[arg(long)]
294 reverse: Option<String>,
295 /// Output as JSON
296 #[arg(long)]
297 json: bool,
298 /// Full PTX snippets and detailed analysis
299 #[arg(short, long)]
300 verbose: bool,
301 /// Show batched prefill kernel variants instead of decode
302 #[arg(long)]
303 prefill: bool,
304 },
305 /// PTX analysis and bug detection (register pressure, roofline)
306 #[command(name = "ptx")]
307 Ptx {
308 /// Path to a PTX source file
309 #[arg(value_name = "FILE")]
310 file: Option<PathBuf>,
311 /// Analyze a named kernel from trueno-gpu
312 #[arg(long, short)]
313 kernel: Option<String>,
314 /// Strict mode (no performance whitelist)
315 #[arg(long)]
316 strict: bool,
317 /// Show only bug analysis (skip register/memory/roofline)
318 #[arg(long)]
319 bugs: bool,
320 /// Output as JSON
321 #[arg(long)]
322 json: bool,
323 /// Verbose output (include PTX source listing)
324 #[arg(short, long)]
325 verbose: bool,
326 },
327 /// ML tuning: LoRA/QLoRA configuration, memory planning, and HPO (GH-176, SPEC-TUNE-2026-001)
328 #[cfg(feature = "training")]
329 Tune {
330 /// Path to model file (optional if using --model)
331 #[arg(value_name = "FILE")]
332 file: Option<PathBuf>,
333 /// Tuning method: auto, full, lora, qlora
334 #[arg(long, short = 'm', default_value = "auto")]
335 method: String,
336 /// LoRA rank (default: auto-selected)
337 #[arg(long, short = 'r')]
338 rank: Option<u32>,
339 /// Available VRAM in GB
340 #[arg(long, default_value = "16.0")]
341 vram: f64,
342 /// Only plan configuration, don't train
343 #[arg(long)]
344 plan: bool,
345 /// Model size for planning (e.g., "7B", "1.5B")
346 #[arg(long, value_name = "SIZE")]
347 model: Option<String>,
348 /// Freeze base model weights
349 #[arg(long)]
350 freeze_base: bool,
351 /// Training data file (JSONL format)
352 #[arg(long, value_name = "FILE")]
353 train_data: Option<PathBuf>,
354 /// Output as JSON (for CI integration)
355 #[arg(long)]
356 json: bool,
357 /// Task type for HPO: classify (SPEC-TUNE-2026-001)
358 #[arg(long)]
359 task: Option<String>,
360 /// Number of HPO trials (default: 10)
361 #[arg(long, default_value = "10")]
362 budget: usize,
363 /// HPO search strategy: tpe, grid, random
364 #[arg(long, default_value = "tpe")]
365 strategy: String,
366 /// HPO scheduler: asha, median, none
367 #[arg(long, default_value = "asha")]
368 scheduler: String,
369 /// Scout mode: 1 epoch per trial for fast exploration
370 #[arg(long)]
371 scout: bool,
372 /// Training data file for HPO (JSONL format)
373 #[arg(long, value_name = "FILE")]
374 data: Option<PathBuf>,
375 /// Number of output classes for classification
376 #[arg(long, default_value = "5")]
377 num_classes: usize,
378 /// Model size hint for HPO (e.g., "0.5B", "1.5B")
379 #[arg(long)]
380 model_size: Option<String>,
381 /// Warm-start from scout phase results directory
382 #[arg(long, value_name = "DIR")]
383 from_scout: Option<PathBuf>,
384 /// Maximum epochs per trial (full mode, default: 20)
385 #[arg(long, default_value = "20")]
386 max_epochs: usize,
387 /// Maximum wall-clock time (e.g., "8h", "30m")
388 #[arg(long)]
389 time_limit: Option<String>,
390 },
391 /// Attach live TUI to a running training session
392 #[cfg(feature = "training")]
393 Monitor {
394 /// Experiment output directory (same as finetune -o)
395 #[arg(value_name = "DIR")]
396 dir: Option<PathBuf>,
397 /// Refresh interval in milliseconds
398 #[arg(long, default_value = "500")]
399 refresh_ms: u64,
400 /// Compact display mode
401 #[arg(long)]
402 compact: bool,
403 /// Output JSON lines instead of TUI (for LLM agents and CI)
404 #[arg(long)]
405 json: bool,
406 /// Output format: tui (default), json, text
407 #[arg(long, default_value = "tui")]
408 format: String,
409 },
410 /// List, show, and compare training experiment runs
411 #[cfg(feature = "training")]
412 Runs {
413 #[command(subcommand)]
414 command: RunsCommands,
415 },
416 /// Interactive experiment browser (TUI with loss curves)
417 #[cfg(feature = "training")]
418 Experiment {
419 #[command(subcommand)]
420 command: ExperimentCommands,
421 },
422 /// ComputeBrick pipeline monitor (cbtop)
423 Cbtop {
424 /// Model name (e.g., qwen2.5-coder-1.5b)
425 #[arg(long)]
426 model: Option<String>,
427 /// Attach to running realizar process
428 #[arg(long)]
429 attach: Option<String>,
430 /// Path to GGUF model file for real profiling
431 #[arg(long, value_name = "MODEL")]
432 model_path: Option<PathBuf>,
433 /// Run in headless mode (no TUI, for CI/automation)
434 #[arg(long)]
435 headless: bool,
436 /// Output JSON format (requires --headless)
437 #[arg(long)]
438 json: bool,
439 /// Output file path (requires --headless)
440 #[arg(long, value_name = "FILE")]
441 output: Option<PathBuf>,
442 /// CI mode: exit with code 1 if thresholds not met
443 #[arg(long)]
444 ci: bool,
445 /// Minimum throughput threshold in tok/s (for --ci)
446 #[arg(long, value_name = "TOK_S")]
447 throughput: Option<f64>,
448 /// Minimum brick score threshold 0-100 (for --ci)
449 #[arg(long, value_name = "SCORE")]
450 brick_score: Option<u32>,
451 /// Number of warmup iterations before measurement
452 #[arg(long, default_value = "10")]
453 warmup: usize,
454 /// Number of measurement iterations
455 #[arg(long, default_value = "100")]
456 iterations: usize,
457 /// PAR-100: Enable speculative decoding benchmark
458 #[arg(long)]
459 speculative: bool,
460 /// PAR-100: Number of tokens to draft speculatively (default: 4)
461 #[arg(long, default_value = "4")]
462 speculation_k: usize,
463 /// PAR-099: Path to draft model for speculative decoding
464 #[arg(long, value_name = "DRAFT_MODEL")]
465 draft_model: Option<PathBuf>,
466 /// PAR-102: Number of concurrent requests
467 #[arg(long, default_value = "1")]
468 concurrent: usize,
469 /// Use simulated data (for CI testing only)
470 #[arg(long)]
471 simulated: bool,
472 },
473 /// Export for probar visual regression testing (PMAT-481)
474 Probar {
475 /// Path to .apr model file
476 #[arg(value_name = "FILE")]
477 file: PathBuf,
478 /// Output directory for test artifacts
479 #[arg(short, long, default_value = "./probar-export")]
480 output: PathBuf,
481 /// Export format: json, png, or both
482 #[arg(long, default_value = "both")]
483 format: String,
484 /// Golden reference directory for comparison
485 #[arg(long)]
486 golden: Option<PathBuf>,
487 /// Filter layers by name pattern
488 #[arg(long)]
489 layer: Option<String>,
490 /// Exit non-zero on golden divergence (CI mode, PMAT-481)
491 #[arg(long)]
492 assert: bool,
493 /// Cosine similarity threshold for golden comparison (default: 0.98)
494 #[arg(long, default_value = "0.98")]
495 tolerance: f32,
496 },
497 /// Compare APR model against HuggingFace source
498 #[command(name = "compare-hf")]
499 CompareHf {
500 /// Path to .apr model file
501 #[arg(value_name = "FILE")]
502 file: PathBuf,
503 /// HuggingFace repo ID (e.g., openai/whisper-tiny)
504 #[arg(long)]
505 hf: String,
506 /// Filter tensors by name pattern
507 #[arg(long)]
508 tensor: Option<String>,
509 /// Comparison threshold (default: 1e-5)
510 #[arg(long, default_value = "1e-5")]
511 threshold: f64,
512 /// Output as JSON
513 #[arg(long)]
514 json: bool,
515 },
516 /// Format-aware binary forensics (10X better than xxd)
517 Hex {
518 /// Path to model file (APR, GGUF, or SafeTensors)
519 #[arg(value_name = "FILE")]
520 file: PathBuf,
521 /// Filter tensors by name pattern
522 #[arg(long)]
523 tensor: Option<String>,
524 /// Limit bytes/values to display
525 #[arg(long, default_value = "64")]
526 limit: usize,
527 /// Show tensor statistics
528 #[arg(long)]
529 stats: bool,
530 /// List tensor names only
531 #[arg(long)]
532 list: bool,
533 /// Output as JSON
534 #[arg(long)]
535 json: bool,
536 /// Annotated file header (magic, version, tensor count, metadata)
537 #[arg(long)]
538 header: bool,
539 /// Q4K/Q6K/Q8_0 super-block structure with field annotations
540 #[arg(long)]
541 blocks: bool,
542 /// Value histogram + entropy + kurtosis analysis
543 #[arg(long)]
544 distribution: bool,
545 /// Layout contract verification overlay per tensor
546 #[arg(long)]
547 contract: bool,
548 /// Per-region byte entropy analysis
549 #[arg(long)]
550 entropy: bool,
551 /// Raw bytes (like xxd but format-aware, with ASCII column)
552 #[arg(long)]
553 raw: bool,
554 /// Start at byte offset (supports 0x prefix for hex)
555 #[arg(long, default_value = "0")]
556 offset: String,
557 /// Bytes per row for raw output (default: 16)
558 #[arg(long, default_value = "16")]
559 width: usize,
560 /// Slice range for partial tensor reads (e.g., 0:3 for first 3 elements)
561 #[arg(long)]
562 slice: Option<String>,
563 },
564 /// Model architecture tree view
565 Tree {
566 /// Path to .apr model file
567 #[arg(value_name = "FILE")]
568 file: PathBuf,
569 /// Filter by component pattern
570 #[arg(long)]
571 filter: Option<String>,
572 /// Output format: ascii, dot, mermaid, json
573 #[arg(long, default_value = "ascii")]
574 format: String,
575 /// Show tensor sizes
576 #[arg(long)]
577 sizes: bool,
578 /// Maximum tree depth
579 #[arg(long)]
580 depth: Option<usize>,
581 },
582 /// Data flow visualization
583 Flow {
584 /// Path to .apr model file
585 #[arg(value_name = "FILE")]
586 file: PathBuf,
587 /// Filter by layer pattern
588 #[arg(long)]
589 layer: Option<String>,
590 /// Component to visualize: full, encoder, decoder, etc.
591 #[arg(long, default_value = "full")]
592 component: String,
593 /// Verbose output with statistics
594 #[arg(short, long)]
595 verbose: bool,
596 /// Output as JSON
597 #[arg(long)]
598 json: bool,
599 },
600 /// Cross-subcommand smoke test (does every tool handle this model?)
601 Qualify {
602 /// Path to model file (APR, GGUF, or SafeTensors)
603 #[arg(value_name = "FILE")]
604 file: PathBuf,
605 /// Testing tier: smoke (Phase 1), standard (+contracts), full (+playbook)
606 #[arg(long, default_value = "smoke")]
607 tier: String,
608 /// Timeout per gate in seconds
609 #[arg(long, default_value = "120")]
610 timeout: u64,
611 /// Output as JSON
612 #[arg(long)]
613 json: bool,
614 /// Show subcommand output (disable stdout suppression)
615 #[arg(short, long)]
616 verbose: bool,
617 /// Skip specific gates (comma-separated)
618 #[arg(long, value_delimiter = ',')]
619 skip: Option<Vec<String>>,
620 },
621 /// Training pipeline (plan/apply) — forjar-style pre-flight validation
622 #[cfg(feature = "training")]
623 Train {
624 #[command(subcommand)]
625 command: TrainCommands,
626 },
627 /// Pretraining loop driver (SHIP-TWO-001 MODEL-2).
628 ///
629 /// Wires the pretraining loop shape defined by
630 /// `contracts/training-loop-pretrain-v1.yaml`. Executes a synthetic
631 /// decreasing-loss drive by default so GATE-TRAIN-005 / -007 / -008
632 /// divergence-and-NaN guards can be exercised without an actual
633 /// 370M compute run. Real corpus wiring is a follow-up ticket.
634 #[cfg(feature = "training")]
635 Pretrain {
636 /// Dataset path (tokenized shard index or raw corpus).
637 #[arg(long, value_name = "PATH")]
638 dataset: PathBuf,
639 /// Tokenizer directory (vocab.json + merges.txt).
640 #[arg(long, value_name = "DIR")]
641 tokenizer: PathBuf,
642 /// Run output directory — checkpoints + metadata go to `{run_dir}/ckpt/`.
643 #[arg(long, value_name = "DIR")]
644 run_dir: PathBuf,
645 /// Training regime — finetune (MODEL-1) or from-scratch (MODEL-2 cold start).
646 /// Per contract training-loop-pretrain-v1 §hyperparameter_defaults,
647 /// this atomically flips (regime, lr_max, warmup_steps, target_val_loss)
648 /// unless explicit --lr / --warmup-steps / --target-val-loss override.
649 #[arg(long, value_enum, default_value = "finetune")]
650 mode: PretrainMode,
651 /// Peak learning rate after warmup. Omit to inherit mode default
652 /// (finetune: 5e-5, from-scratch: 3e-4).
653 #[arg(long)]
654 lr: Option<f32>,
655 /// Warmup + cosine decay total steps.
656 #[arg(long, default_value = "1000")]
657 num_steps: usize,
658 /// Number of warmup steps. Omit to inherit mode default
659 /// (finetune: 100, from-scratch: 1000).
660 #[arg(long)]
661 warmup_steps: Option<usize>,
662 /// Micro-batch size.
663 #[arg(long, default_value = "16")]
664 batch_size: usize,
665 /// Sequence length per example.
666 #[arg(long, default_value = "1024")]
667 seq_length: usize,
668 /// Steps per epoch — controls per-epoch artifact cadence.
669 #[arg(long, default_value = "100")]
670 steps_per_epoch: usize,
671 /// GATE-TRAIN-006 fixed RNG seed.
672 #[arg(long, default_value = "42")]
673 seed: u64,
674 /// Target val_loss. Omit to inherit mode default
675 /// (finetune: 2.2, from-scratch: 3.0).
676 #[arg(long)]
677 target_val_loss: Option<f32>,
678 /// Vocabulary size (required for `--mode from-scratch` INV-TRAIN-005
679 /// regime-dependent cap: 2·ln(vocab_size)). MODEL-2 uses 50257.
680 #[arg(long, default_value = "50257")]
681 vocab_size: u32,
682 /// Synthetic-drive only — do not attempt real compute, exercise loop gates only.
683 /// INV-TRAIN-010: absent = real compute (drive_real), present = synthetic (drive_synthetic).
684 #[arg(long, action = clap::ArgAction::SetTrue)]
685 synthetic: bool,
686 /// Training backend. Grammar (contract gpu-training-backend-v1
687 /// INV-GPUTRAIN-001): `^(cpu|cuda(:[0-9]|:1[0-5])?|auto)$`.
688 /// Default `auto` uses CUDA if available, else CPU (the only
689 /// spelling that may fall back silently — all other values
690 /// hard-fail on missing runtime per GATE-GPUTRAIN-002).
691 #[arg(long, default_value = "auto")]
692 device: String,
693 /// Initial weights from a pretrained APR file
694 /// (contract `apr-pretrain-from-init-v1`). Per spec §49's
695 /// MODEL-2 pretrained-init pivot: when present, load weights
696 /// from `<PATH>` instead of random-init. Composes with
697 /// `--mode finetune` (canonical) or `--mode from-scratch`
698 /// (allowed but non-canonical — emits a warning). Missing,
699 /// corrupted, or arch-mismatched APR files exit non-zero
700 /// before step 1 (no silent random-init fallback).
701 #[arg(long, value_name = "PATH")]
702 init: Option<PathBuf>,
703 },
704 /// Tokenizer training pipeline (plan/apply) — BPE vocabulary learning
705 Tokenize {
706 #[command(subcommand)]
707 command: TokenizeCommands,
708 },
709 /// Data quality pipeline (audit, split, balance) — powered by alimentar
710 Data {
711 #[command(subcommand)]
712 command: DataCommands,
713 },
714 /// Pipeline orchestration (plan/apply/status) — wraps forjar DAG engine
715 Pipeline {
716 #[command(subcommand)]
717 command: PipelineCommands,
718 },
719 /// Automated Five Whys diagnosis on a training checkpoint
720 Diagnose {
721 /// Path to checkpoint directory
722 #[arg(value_name = "CHECKPOINT_DIR")]
723 checkpoint_dir: PathBuf,
724 /// Test data file (JSONL) for evaluation
725 #[arg(long, value_name = "FILE")]
726 data: Option<PathBuf>,
727 /// Model size hint: "0.5B", "tiny"
728 #[arg(long)]
729 model_size: Option<String>,
730 /// Number of output classes (default: 5)
731 #[arg(long, default_value = "5")]
732 num_classes: usize,
733 },
734 /// Lint an Ollama /api/chat response for schema + NDJSON invariants (CRUX-C-04)
735 OllamaChatLint {
736 /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
737 #[arg(long, value_name = "FILE")]
738 response_file: PathBuf,
739 /// Treat input as NDJSON stream (one frame per line)
740 #[arg(long)]
741 stream: bool,
742 },
743 /// Lint an Ollama /api/chat function-calling response (CRUX-I-04)
744 OllamaToolsLint {
745 /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
746 #[arg(long, value_name = "FILE")]
747 response_file: PathBuf,
748 /// Optional captured request JSON — enables tool-name allowlist gate
749 /// (every called tool name must appear in request.tools[*].function.name)
750 #[arg(long, value_name = "FILE")]
751 request_file: Option<PathBuf>,
752 /// Treat input as NDJSON stream (one frame per line)
753 #[arg(long)]
754 stream: bool,
755 },
756 /// Lint a captured DRY-sampling observation (CRUX-C-23)
757 DrySamplingLint {
758 /// Path to observation JSON
759 #[arg(long, value_name = "FILE")]
760 observation_file: PathBuf,
761 },
762 /// Lint a captured AWQ quality/compression/flags observation (CRUX-B-08)
763 AwqLint {
764 /// Path to captured AWQ observation JSON
765 #[arg(long, value_name = "FILE")]
766 observation_file: PathBuf,
767 },
768 /// Lint a captured FP8 (E4M3) round-trip + SM-capability observation (CRUX-B-11)
769 Fp8Lint {
770 /// Path to captured observation JSON (frobenius, capability blocks)
771 #[arg(long, value_name = "FILE")]
772 observation_file: PathBuf,
773 },
774 /// Lint a captured NF4 codebook/roundtrip/storage/parity observation (CRUX-B-10)
775 Nf4Lint {
776 /// Path to captured NF4 observation JSON
777 #[arg(long, value_name = "FILE")]
778 observation_file: PathBuf,
779 },
780 /// Lint a captured GPTQ compression/cosine/flags observation (CRUX-B-09)
781 GptqLint {
782 /// Path to captured GPTQ observation JSON
783 #[arg(long, value_name = "FILE")]
784 observation_file: PathBuf,
785 },
786 /// Lint a captured CUDA OOM postmortem report (CRUX-F-13)
787 OomLint {
788 /// Path to captured OOM postmortem JSON (e.g. /tmp/apr-oom-<ts>.json)
789 #[arg(long, value_name = "FILE")]
790 report_file: PathBuf,
791 /// Optional captured stderr log to verify the OOM_REPORT breadcrumb
792 #[arg(long, value_name = "FILE")]
793 stderr_file: Option<PathBuf>,
794 },
795 /// Lint a captured OpenAI tool-use response (CRUX-C-11)
796 ToolUseLint {
797 /// Path to captured OpenAI tool-use response JSON
798 #[arg(long, value_name = "FILE")]
799 observation_file: PathBuf,
800 },
801 /// Lint a GBNF grammar-constrained observation (CRUX-C-10)
802 GbnfLint {
803 /// Path to captured GBNF observation JSON
804 #[arg(long, value_name = "FILE")]
805 observation_file: PathBuf,
806 },
807 /// Lint a typical-p sampling observation (CRUX-C-22)
808 TypicalPLint {
809 #[arg(long, value_name = "FILE")]
810 observation_file: PathBuf,
811 },
812 /// Gradient-norm telemetry analysis (CRUX-F-09)
813 GradNorm {
814 /// Path to JSON file of per-step grad-norm records
815 #[arg(long, value_name = "FILE")]
816 history_file: PathBuf,
817 /// Maximum allowed clipped grad-norm (for cap-violation check)
818 #[arg(long, value_name = "M")]
819 max_grad_norm: Option<f64>,
820 /// Rolling-median window size for spike detection (in steps)
821 #[arg(long, default_value = "16")]
822 spike_window: usize,
823 /// Multiplier threshold for spike detection
824 #[arg(long, default_value = "10.0")]
825 spike_multiplier: f64,
826 },
827 /// Lint a captured registry byte-quota observation (CRUX-A-22)
828 RegistryQuotaLint {
829 /// Path to captured quota/atomic/ceiling observation JSON
830 #[arg(long, value_name = "FILE")]
831 observation_file: PathBuf,
832 },
833 /// Lint a captured imatrix calibration observation (CRUX-B-07)
834 ImatrixLint {
835 /// Path to captured imatrix observation JSON
836 #[arg(long, value_name = "FILE")]
837 observation_file: PathBuf,
838 },
839 /// Lint a captured /v1/embeddings observation (CRUX-C-13)
840 EmbeddingsLint {
841 #[arg(long, value_name = "FILE")]
842 observation_file: PathBuf,
843 },
844 /// Lint a captured Hub+local unified-search merge observation (CRUX-A-23)
845 UnifiedSearchLint {
846 /// Path to captured unified-search observation JSON
847 #[arg(long, value_name = "FILE")]
848 observation_file: PathBuf,
849 },
850 /// Lint a captured `apr rm` / `apr gc` blob-GC observation (CRUX-A-25)
851 RmGcLint {
852 /// Path to captured rm/gc observation JSON
853 #[arg(long, value_name = "FILE")]
854 observation_file: PathBuf,
855 },
856 /// Lint a captured APR_MODELS shared-cache observation (CRUX-A-21)
857 SharedCacheLint {
858 /// Path to captured dedup/permission observation JSON
859 #[arg(long, value_name = "FILE")]
860 observation_file: PathBuf,
861 },
862 /// Publishing, conversion, and analysis tools
863 #[command(flatten)]
864 Tools(ToolCommands),
865}
866
867#[cfg(feature = "training")]
868/// Subcommands for `apr runs` — experiment run management (ALB-050/051)
869#[derive(Subcommand, Debug)]
870pub enum RunsCommands {
871 /// List all training experiment runs (with inline loss sparklines)
872 Ls {
873 /// Directory to scan for experiments (default: current dir)
874 #[arg(long, value_name = "DIR")]
875 dir: Option<PathBuf>,
876 /// Read from global experiment registry (~/.entrenar/experiments.db)
877 #[arg(long)]
878 global: bool,
879 /// Filter by status: running, completed, failed, all
880 #[arg(long, default_value = "all")]
881 status: String,
882 /// Output as JSON
883 #[arg(long)]
884 json: bool,
885 /// Maximum number of runs to show
886 #[arg(long, default_value = "50")]
887 limit: usize,
888 },
889 /// Show detailed metrics for a specific run (with braille loss curve)
890 Show {
891 /// Run ID
892 #[arg(value_name = "RUN_ID")]
893 run_id: String,
894 /// Directory containing experiment DB
895 #[arg(long, value_name = "DIR")]
896 dir: Option<PathBuf>,
897 /// Read from global registry
898 #[arg(long)]
899 global: bool,
900 /// Output as JSON
901 #[arg(long)]
902 json: bool,
903 },
904 /// Compare two runs side-by-side (loss curves, config diff, metrics)
905 Diff {
906 /// First run ID
907 #[arg(value_name = "RUN_A")]
908 run_a: String,
909 /// Second run ID
910 #[arg(value_name = "RUN_B")]
911 run_b: String,
912 /// Directory containing experiment DB
913 #[arg(long, value_name = "DIR")]
914 dir: Option<PathBuf>,
915 /// Read from global registry
916 #[arg(long)]
917 global: bool,
918 /// Output as JSON
919 #[arg(long)]
920 json: bool,
921 },
922}
923
924#[cfg(feature = "training")]
925/// Subcommands for `apr experiment` — interactive experiment browser (ALB-024)
926#[derive(Subcommand, Debug)]
927pub enum ExperimentCommands {
928 /// Browse experiment history with interactive TUI (loss curves, params)
929 View {
930 /// Path to experiment database file
931 #[arg(long, value_name = "FILE")]
932 db: Option<PathBuf>,
933 /// Read from global experiment registry (~/.entrenar/experiments.db)
934 #[arg(long)]
935 global: bool,
936 /// Output as JSON (non-interactive)
937 #[arg(long)]
938 json: bool,
939 },
940}