apr_cli/extended_commands.rs
1/// Extended CLI commands (analysis, profiling, QA, benchmarks, and advanced tools).
2///
3/// Flattened into `Commands` via `#[command(flatten)]` so all subcommands remain
4/// top-level from the user's perspective (e.g., `apr chat`, `apr profile`).
5#[derive(Subcommand, Debug)]
6pub enum ExtendedCommands {
7 /// Interactive chat with language model
8 Chat {
9 /// Path to .apr model file
10 #[arg(value_name = "FILE")]
11 file: PathBuf,
12 /// Sampling temperature (0 = greedy, higher = more random)
13 #[arg(long, default_value = "0.7")]
14 temperature: f32,
15 /// Nucleus sampling threshold
16 #[arg(long, default_value = "0.9")]
17 top_p: f32,
18 /// Maximum tokens to generate per response
19 #[arg(long, default_value = "512")]
20 max_tokens: usize,
21 /// System prompt to set model behavior
22 #[arg(long)]
23 system: Option<String>,
24 /// Show inspection info (top-k probs, tokens/sec)
25 #[arg(long)]
26 inspect: bool,
27 /// Disable GPU acceleration (use CPU)
28 #[arg(long)]
29 no_gpu: bool,
30 /// Force GPU acceleration (requires CUDA)
31 #[arg(long)]
32 gpu: bool,
33 /// Enable inference tracing (APR-TRACE-001)
34 #[arg(long)]
35 trace: bool,
36 /// Trace specific steps only (comma-separated)
37 #[arg(long, value_delimiter = ',')]
38 trace_steps: Option<Vec<String>>,
39 /// Verbose tracing
40 #[arg(long)]
41 trace_verbose: bool,
42 /// Save trace output to JSON file
43 #[arg(long, value_name = "FILE")]
44 trace_output: Option<PathBuf>,
45 /// Trace detail level (none, basic, layer, payload)
46 #[arg(long, value_name = "LEVEL", default_value = "basic")]
47 trace_level: String,
48 /// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
49 #[arg(long)]
50 profile: bool,
51 /// PMAT-488: Compute backend override (cuda, cpu, wgpu)
52 #[arg(long, value_name = "BACKEND")]
53 backend: Option<String>,
54 },
55 /// Benchmark throughput (spec H12: >= 10 tok/s)
56 Bench {
57 /// Path to model file
58 #[arg(value_name = "FILE")]
59 file: PathBuf,
60 /// Number of warmup iterations
61 #[arg(long, default_value = "3")]
62 warmup: usize,
63 /// Number of measurement iterations
64 #[arg(long, default_value = "5")]
65 iterations: usize,
66 /// Max tokens to generate per iteration
67 #[arg(long, default_value = "32")]
68 max_tokens: usize,
69 /// Test prompt
70 #[arg(long)]
71 prompt: Option<String>,
72 /// Use realizar for fast inference (vs aprender baseline)
73 #[arg(long)]
74 fast: bool,
75 /// Benchmark specific brick
76 #[arg(long)]
77 brick: Option<String>,
78 /// Comma-separated latency percentile points for JSON output
79 /// (CRUX-E-07). Default: `50,95,99`. Values must be in (0, 100].
80 #[arg(long, value_delimiter = ',', default_value = "50,95,99")]
81 percentiles: Vec<f64>,
82 },
83 /// Evaluate model perplexity (spec H13: PPL <= 20) or classification metrics
84 Eval {
85 /// Path to model file or checkpoint directory
86 #[arg(value_name = "FILE")]
87 file: PathBuf,
88 /// Dataset: wikitext-2, lambada, or custom
89 #[arg(long, default_value = "wikitext-2")]
90 dataset: String,
91 /// Custom text (when dataset=custom)
92 #[arg(long)]
93 text: Option<String>,
94 /// Maximum tokens to evaluate
95 #[arg(long, default_value = "512")]
96 max_tokens: usize,
97 /// Perplexity threshold for pass/fail
98 #[arg(long, default_value = "20.0")]
99 threshold: f32,
100 /// Task type: omit for perplexity, "classify" for classification eval
101 #[arg(long)]
102 task: Option<String>,
103 /// Test data file (JSONL) for classification evaluation
104 #[arg(long, value_name = "FILE")]
105 data: Option<PathBuf>,
106 /// Model size hint: "0.5B", "tiny" (for classification eval)
107 #[arg(long)]
108 model_size: Option<String>,
109 /// Number of output classes (default: 5)
110 #[arg(long, default_value = "5")]
111 num_classes: usize,
112 /// Generate HuggingFace model card (README.md) in checkpoint dir
113 #[arg(long)]
114 generate_card: bool,
115 /// Device for inference: "cpu" (default) or "cuda" (GPU-accelerated, ALB-089)
116 #[arg(long, default_value = "cpu")]
117 device: String,
118 /// Number of samples per problem for pass@k (ALB-088, default: 1)
119 #[arg(long, default_value = "1")]
120 samples: usize,
121 /// Sampling temperature (0.0 = greedy, 0.8 = standard for pass@k>1)
122 #[arg(long, default_value = "0.0")]
123 temperature: f32,
124 },
125 /// Deep profiling with Roofline analysis
126 Profile {
127 /// Path to model file
128 #[arg(value_name = "FILE")]
129 file: PathBuf,
130 /// Layer-by-layer granular analysis
131 #[arg(long)]
132 granular: bool,
133 /// Output format (human, json, flamegraph)
134 #[arg(long, default_value = "human")]
135 format: String,
136 /// Focus on specific operation
137 #[arg(long)]
138 focus: Option<String>,
139 /// Detect naive implementations
140 #[arg(long)]
141 detect_naive: bool,
142 /// GFLOPS threshold for naive detection
143 #[arg(long, default_value = "10.0")]
144 threshold: f64,
145 /// Compare against HuggingFace baseline
146 #[arg(long)]
147 compare_hf: Option<String>,
148 /// Measure energy consumption (requires RAPL)
149 #[arg(long)]
150 energy: bool,
151 /// Compute performance grade (vs Ollama baseline)
152 #[arg(long)]
153 perf_grade: bool,
154 /// Show call graph
155 #[arg(long)]
156 callgraph: bool,
157 /// Exit non-zero if naive implementation detected
158 #[arg(long)]
159 fail_on_naive: bool,
160 /// Output file path for flamegraph SVG (GH-174, PMAT-182)
161 #[arg(long, short = 'o')]
162 output: Option<PathBuf>,
163
164 // PMAT-192: CI Assertion Mode (GH-180)
165 /// Enable CI mode with assertion checks (exits 1 on failure)
166 #[arg(long)]
167 ci: bool,
168 /// Minimum throughput in tok/s (CI assertion, exits 1 if below)
169 #[arg(long)]
170 assert_throughput: Option<f64>,
171 /// Maximum p99 latency in ms (CI assertion, exits 1 if above)
172 #[arg(long)]
173 assert_p99: Option<f64>,
174 /// Maximum p50 latency in ms (CI assertion, exits 1 if above)
175 #[arg(long)]
176 assert_p50: Option<f64>,
177 /// Warmup passes before measurement (default: 3)
178 #[arg(long, default_value = "3")]
179 warmup: usize,
180 /// Measurement passes (default: 10)
181 #[arg(long, default_value = "10")]
182 measure: usize,
183 /// Number of tokens to generate per measurement pass (default: 32)
184 #[arg(long, default_value = "32")]
185 tokens: usize,
186 /// Compare against Ollama baseline (runs ollama for comparison)
187 #[arg(long)]
188 ollama: bool,
189 /// Disable GPU (force CPU-only profiling)
190 #[arg(long)]
191 no_gpu: bool,
192 /// Compare against another model format (F-PROFILE-011)
193 #[arg(long, value_name = "FILE")]
194 compare: Option<PathBuf>,
195 },
196 /// Falsifiable QA checklist for model releases
197 Qa {
198 /// Path to model file
199 #[arg(value_name = "FILE")]
200 file: PathBuf,
201 /// Minimum throughput threshold in tok/s
202 #[arg(long, value_name = "TPS")]
203 assert_tps: Option<f64>,
204 /// Minimum speedup vs Ollama
205 #[arg(long, value_name = "SPEEDUP")]
206 assert_speedup: Option<f64>,
207 /// Minimum GPU vs CPU speedup (F-PERF-042)
208 #[arg(long, value_name = "SPEEDUP")]
209 assert_gpu_speedup: Option<f64>,
210 /// Skip golden output test
211 #[arg(long)]
212 skip_golden: bool,
213 /// Skip throughput benchmark
214 #[arg(long)]
215 skip_throughput: bool,
216 /// Skip Ollama parity comparison
217 #[arg(long)]
218 skip_ollama: bool,
219 /// Skip GPU vs CPU speedup test (F-PERF-042)
220 #[arg(long)]
221 skip_gpu_speedup: bool,
222 /// Skip tensor contract validation (PMAT-235)
223 #[arg(long)]
224 skip_contract: bool,
225 /// Skip cross-format parity test (F-QUAL-032)
226 #[arg(long)]
227 skip_format_parity: bool,
228 /// Skip PTX parity validation (GH-219)
229 #[arg(long)]
230 skip_ptx_parity: bool,
231 /// SafeTensors model path for cross-format parity test (F-QUAL-032)
232 #[arg(long, value_name = "PATH")]
233 safetensors_path: Option<PathBuf>,
234 /// Number of benchmark iterations
235 #[arg(long, default_value = "10")]
236 iterations: usize,
237 /// Number of warmup iterations
238 #[arg(long, default_value = "3")]
239 warmup: usize,
240 /// Maximum tokens to generate
241 #[arg(long, default_value = "32")]
242 max_tokens: usize,
243 /// Output as JSON (for CI integration)
244 #[arg(long)]
245 json: bool,
246 /// Verbose output
247 #[arg(short, long)]
248 verbose: bool,
249 /// Minimum number of gates that must execute (fail if fewer)
250 #[arg(long, value_name = "N")]
251 min_executed: Option<usize>,
252 /// Previous QA report for regression detection
253 #[arg(long, value_name = "FILE")]
254 previous_report: Option<PathBuf>,
255 /// Maximum allowed performance regression ratio (default: 0.10 = 10%)
256 #[arg(long, value_name = "RATIO")]
257 regression_threshold: Option<f64>,
258 /// Skip GPU state isolation test
259 #[arg(long)]
260 skip_gpu_state: bool,
261 /// Skip metadata plausibility validation (Bug 210, GH-222)
262 #[arg(long)]
263 skip_metadata: bool,
264 /// Skip GPU capability match gate (GH-280)
265 #[arg(long)]
266 skip_capability: bool,
267 /// Assert classifier head presence and shape (F-CLASS-004)
268 #[arg(long)]
269 assert_classifier_head: bool,
270 },
271 /// GPU/CPU parity check (PMAT-232: genchi genbutsu — see where GPU diverges)
272 Parity {
273 /// Path to GGUF model file
274 #[arg(value_name = "FILE")]
275 file: PathBuf,
276 /// Prompt text (default: "What is 2+2?")
277 #[arg(short, long, default_value = "What is 2+2?")]
278 prompt: String,
279 /// Assert parity (exit non-zero on divergence)
280 #[arg(long)]
281 assert: bool,
282 },
283 /// Model-to-PTX source mapping (Mieruka: make GPU kernel dispatch visible)
284 #[command(name = "ptx-map")]
285 PtxMap {
286 /// Path to GGUF model file
287 #[arg(value_name = "FILE")]
288 file: PathBuf,
289 /// Filter to specific kernel (e.g., --kernel Q4KGemv)
290 #[arg(long)]
291 kernel: Option<String>,
292 /// Reverse lookup: kernel name -> which layers/steps use it
293 #[arg(long)]
294 reverse: Option<String>,
295 /// Output as JSON
296 #[arg(long)]
297 json: bool,
298 /// Full PTX snippets and detailed analysis
299 #[arg(short, long)]
300 verbose: bool,
301 /// Show batched prefill kernel variants instead of decode
302 #[arg(long)]
303 prefill: bool,
304 },
305 /// PTX analysis and bug detection (register pressure, roofline)
306 #[command(name = "ptx")]
307 Ptx {
308 /// Path to a PTX source file
309 #[arg(value_name = "FILE")]
310 file: Option<PathBuf>,
311 /// Analyze a named kernel from trueno-gpu
312 #[arg(long, short)]
313 kernel: Option<String>,
314 /// Strict mode (no performance whitelist)
315 #[arg(long)]
316 strict: bool,
317 /// Show only bug analysis (skip register/memory/roofline)
318 #[arg(long)]
319 bugs: bool,
320 /// Output as JSON
321 #[arg(long)]
322 json: bool,
323 /// Verbose output (include PTX source listing)
324 #[arg(short, long)]
325 verbose: bool,
326 },
327 /// ML tuning: LoRA/QLoRA configuration, memory planning, and HPO (GH-176, SPEC-TUNE-2026-001)
328 #[cfg(feature = "training")]
329 Tune {
330 /// Path to model file (optional if using --model)
331 #[arg(value_name = "FILE")]
332 file: Option<PathBuf>,
333 /// Tuning method: auto, full, lora, qlora
334 #[arg(long, short = 'm', default_value = "auto")]
335 method: String,
336 /// LoRA rank (default: auto-selected)
337 #[arg(long, short = 'r')]
338 rank: Option<u32>,
339 /// Available VRAM in GB
340 #[arg(long, default_value = "16.0")]
341 vram: f64,
342 /// Only plan configuration, don't train
343 #[arg(long)]
344 plan: bool,
345 /// Model size for planning (e.g., "7B", "1.5B")
346 #[arg(long, value_name = "SIZE")]
347 model: Option<String>,
348 /// Freeze base model weights
349 #[arg(long)]
350 freeze_base: bool,
351 /// Training data file (JSONL format)
352 #[arg(long, value_name = "FILE")]
353 train_data: Option<PathBuf>,
354 /// Output as JSON (for CI integration)
355 #[arg(long)]
356 json: bool,
357 /// Task type for HPO: classify (SPEC-TUNE-2026-001)
358 #[arg(long)]
359 task: Option<String>,
360 /// Number of HPO trials (default: 10)
361 #[arg(long, default_value = "10")]
362 budget: usize,
363 /// HPO search strategy: tpe, grid, random
364 #[arg(long, default_value = "tpe")]
365 strategy: String,
366 /// HPO scheduler: asha, median, none
367 #[arg(long, default_value = "asha")]
368 scheduler: String,
369 /// Scout mode: 1 epoch per trial for fast exploration
370 #[arg(long)]
371 scout: bool,
372 /// Training data file for HPO (JSONL format)
373 #[arg(long, value_name = "FILE")]
374 data: Option<PathBuf>,
375 /// Number of output classes for classification
376 #[arg(long, default_value = "5")]
377 num_classes: usize,
378 /// Model size hint for HPO (e.g., "0.5B", "1.5B")
379 #[arg(long)]
380 model_size: Option<String>,
381 /// Warm-start from scout phase results directory
382 #[arg(long, value_name = "DIR")]
383 from_scout: Option<PathBuf>,
384 /// Maximum epochs per trial (full mode, default: 20)
385 #[arg(long, default_value = "20")]
386 max_epochs: usize,
387 /// Maximum wall-clock time (e.g., "8h", "30m")
388 #[arg(long)]
389 time_limit: Option<String>,
390 },
391 /// Attach live TUI to a running training session
392 #[cfg(feature = "training")]
393 Monitor {
394 /// Experiment output directory (same as finetune -o)
395 #[arg(value_name = "DIR")]
396 dir: Option<PathBuf>,
397 /// Refresh interval in milliseconds
398 #[arg(long, default_value = "500")]
399 refresh_ms: u64,
400 /// Compact display mode
401 #[arg(long)]
402 compact: bool,
403 /// Output JSON lines instead of TUI (for LLM agents and CI)
404 #[arg(long)]
405 json: bool,
406 /// Output format: tui (default), json, text
407 #[arg(long, default_value = "tui")]
408 format: String,
409 },
410 /// List, show, and compare training experiment runs
411 #[cfg(feature = "training")]
412 Runs {
413 #[command(subcommand)]
414 command: RunsCommands,
415 },
416 /// Interactive experiment browser (TUI with loss curves)
417 #[cfg(feature = "training")]
418 Experiment {
419 #[command(subcommand)]
420 command: ExperimentCommands,
421 },
422 /// ComputeBrick pipeline monitor (cbtop)
423 Cbtop {
424 /// Model name (e.g., qwen2.5-coder-1.5b)
425 #[arg(long)]
426 model: Option<String>,
427 /// Attach to running realizar process
428 #[arg(long)]
429 attach: Option<String>,
430 /// Path to GGUF model file for real profiling
431 #[arg(long, value_name = "MODEL")]
432 model_path: Option<PathBuf>,
433 /// Run in headless mode (no TUI, for CI/automation)
434 #[arg(long)]
435 headless: bool,
436 /// Output JSON format (requires --headless)
437 #[arg(long)]
438 json: bool,
439 /// Output file path (requires --headless)
440 #[arg(long, value_name = "FILE")]
441 output: Option<PathBuf>,
442 /// CI mode: exit with code 1 if thresholds not met
443 #[arg(long)]
444 ci: bool,
445 /// Minimum throughput threshold in tok/s (for --ci)
446 #[arg(long, value_name = "TOK_S")]
447 throughput: Option<f64>,
448 /// Minimum brick score threshold 0-100 (for --ci)
449 #[arg(long, value_name = "SCORE")]
450 brick_score: Option<u32>,
451 /// Number of warmup iterations before measurement
452 #[arg(long, default_value = "10")]
453 warmup: usize,
454 /// Number of measurement iterations
455 #[arg(long, default_value = "100")]
456 iterations: usize,
457 /// PAR-100: Enable speculative decoding benchmark
458 #[arg(long)]
459 speculative: bool,
460 /// PAR-100: Number of tokens to draft speculatively (default: 4)
461 #[arg(long, default_value = "4")]
462 speculation_k: usize,
463 /// PAR-099: Path to draft model for speculative decoding
464 #[arg(long, value_name = "DRAFT_MODEL")]
465 draft_model: Option<PathBuf>,
466 /// PAR-102: Number of concurrent requests
467 #[arg(long, default_value = "1")]
468 concurrent: usize,
469 /// Use simulated data (for CI testing only)
470 #[arg(long)]
471 simulated: bool,
472 },
473 /// Probar testing framework (GH-876 — visual regression, replay, more).
474 ///
475 /// GH-876 Milestone 1: `apr probar tensor` migrates the existing flat
476 /// `apr probar <FILE>` behavior (PMAT-481 tensor visual regression).
477 /// The remaining probador subcommands (test, record, coverage, playbook,
478 /// comply, av-sync, audio, video, animation, stress, llm) land in
479 /// follow-up PRs that delegate to the probador library.
480 Probar {
481 #[command(subcommand)]
482 command: ProbarSubcommand,
483 },
484 /// Compare APR model against HuggingFace source
485 #[command(name = "compare-hf")]
486 CompareHf {
487 /// Path to .apr model file
488 #[arg(value_name = "FILE")]
489 file: PathBuf,
490 /// HuggingFace repo ID (e.g., openai/whisper-tiny)
491 #[arg(long)]
492 hf: String,
493 /// Filter tensors by name pattern
494 #[arg(long)]
495 tensor: Option<String>,
496 /// Comparison threshold (default: 1e-5)
497 #[arg(long, default_value = "1e-5")]
498 threshold: f64,
499 /// Output as JSON
500 #[arg(long)]
501 json: bool,
502 },
503 /// CRUX-K-11: parse Ollama-style Modelfile DSL into apr config.
504 Modelfile {
505 #[command(subcommand)]
506 command: ModelfileSubcommand,
507 },
508 /// Format-aware binary forensics (10X better than xxd)
509 Hex {
510 /// Path to model file (APR, GGUF, or SafeTensors)
511 #[arg(value_name = "FILE")]
512 file: PathBuf,
513 /// Filter tensors by name pattern
514 #[arg(long)]
515 tensor: Option<String>,
516 /// Limit bytes/values to display
517 #[arg(long, default_value = "64")]
518 limit: usize,
519 /// Show tensor statistics
520 #[arg(long)]
521 stats: bool,
522 /// List tensor names only
523 #[arg(long)]
524 list: bool,
525 /// Output as JSON
526 #[arg(long)]
527 json: bool,
528 /// Annotated file header (magic, version, tensor count, metadata)
529 #[arg(long)]
530 header: bool,
531 /// Q4K/Q6K/Q8_0 super-block structure with field annotations
532 #[arg(long)]
533 blocks: bool,
534 /// Value histogram + entropy + kurtosis analysis
535 #[arg(long)]
536 distribution: bool,
537 /// Layout contract verification overlay per tensor
538 #[arg(long)]
539 contract: bool,
540 /// Per-region byte entropy analysis
541 #[arg(long)]
542 entropy: bool,
543 /// Raw bytes (like xxd but format-aware, with ASCII column)
544 #[arg(long)]
545 raw: bool,
546 /// Start at byte offset (supports 0x prefix for hex)
547 #[arg(long, default_value = "0")]
548 offset: String,
549 /// Bytes per row for raw output (default: 16)
550 #[arg(long, default_value = "16")]
551 width: usize,
552 /// Slice range for partial tensor reads (e.g., 0:3 for first 3 elements)
553 #[arg(long)]
554 slice: Option<String>,
555 },
556 /// Model architecture tree view
557 Tree {
558 /// Path to .apr model file
559 #[arg(value_name = "FILE")]
560 file: PathBuf,
561 /// Filter by component pattern
562 #[arg(long)]
563 filter: Option<String>,
564 /// Output format: ascii, dot, mermaid, json
565 #[arg(long, default_value = "ascii")]
566 format: String,
567 /// Show tensor sizes
568 #[arg(long)]
569 sizes: bool,
570 /// Maximum tree depth
571 #[arg(long)]
572 depth: Option<usize>,
573 },
574 /// Data flow visualization
575 Flow {
576 /// Path to .apr model file
577 #[arg(value_name = "FILE")]
578 file: PathBuf,
579 /// Filter by layer pattern
580 #[arg(long)]
581 layer: Option<String>,
582 /// Component to visualize: full, encoder, decoder, etc.
583 #[arg(long, default_value = "full")]
584 component: String,
585 /// Verbose output with statistics
586 #[arg(short, long)]
587 verbose: bool,
588 /// Output as JSON
589 #[arg(long)]
590 json: bool,
591 },
592 /// Cross-subcommand smoke test (does every tool handle this model?)
593 Qualify {
594 /// Path to model file (APR, GGUF, or SafeTensors)
595 #[arg(value_name = "FILE")]
596 file: PathBuf,
597 /// Testing tier: smoke (Phase 1), standard (+contracts), full (+playbook)
598 #[arg(long, default_value = "smoke")]
599 tier: String,
600 /// Timeout per gate in seconds
601 #[arg(long, default_value = "120")]
602 timeout: u64,
603 /// Output as JSON
604 #[arg(long)]
605 json: bool,
606 /// Show subcommand output (disable stdout suppression)
607 #[arg(short, long)]
608 verbose: bool,
609 /// Skip specific gates (comma-separated)
610 #[arg(long, value_delimiter = ',')]
611 skip: Option<Vec<String>>,
612 },
613 /// Training pipeline (plan/apply) — forjar-style pre-flight validation
614 #[cfg(feature = "training")]
615 Train {
616 #[command(subcommand)]
617 command: TrainCommands,
618 },
619 /// Pretraining loop driver (SHIP-TWO-001 MODEL-2).
620 ///
621 /// Wires the pretraining loop shape defined by
622 /// `contracts/training-loop-pretrain-v1.yaml`. Executes a synthetic
623 /// decreasing-loss drive by default so GATE-TRAIN-005 / -007 / -008
624 /// divergence-and-NaN guards can be exercised without an actual
625 /// 370M compute run. Real corpus wiring is a follow-up ticket.
626 #[cfg(feature = "training")]
627 Pretrain {
628 /// Dataset path (tokenized shard index or raw corpus).
629 #[arg(long, value_name = "PATH")]
630 dataset: PathBuf,
631 /// Tokenizer directory (vocab.json + merges.txt).
632 #[arg(long, value_name = "DIR")]
633 tokenizer: PathBuf,
634 /// Run output directory — checkpoints + metadata go to `{run_dir}/ckpt/`.
635 #[arg(long, value_name = "DIR")]
636 run_dir: PathBuf,
637 /// Training regime — finetune (MODEL-1) or from-scratch (MODEL-2 cold start).
638 /// Per contract training-loop-pretrain-v1 §hyperparameter_defaults,
639 /// this atomically flips (regime, lr_max, warmup_steps, target_val_loss)
640 /// unless explicit --lr / --warmup-steps / --target-val-loss override.
641 #[arg(long, value_enum, default_value = "finetune")]
642 mode: PretrainMode,
643 /// Peak learning rate after warmup. Omit to inherit mode default
644 /// (finetune: 5e-5, from-scratch: 3e-4).
645 #[arg(long)]
646 lr: Option<f32>,
647 /// Warmup + cosine decay total steps.
648 #[arg(long, default_value = "1000")]
649 num_steps: usize,
650 /// Number of warmup steps. Omit to inherit mode default
651 /// (finetune: 100, from-scratch: 1000).
652 #[arg(long)]
653 warmup_steps: Option<usize>,
654 /// Micro-batch size.
655 #[arg(long, default_value = "16")]
656 batch_size: usize,
657 /// Sequence length per example.
658 #[arg(long, default_value = "1024")]
659 seq_length: usize,
660 /// Steps per epoch — controls per-epoch artifact cadence.
661 #[arg(long, default_value = "100")]
662 steps_per_epoch: usize,
663 /// GATE-TRAIN-006 fixed RNG seed.
664 #[arg(long, default_value = "42")]
665 seed: u64,
666 /// Target val_loss. Omit to inherit mode default
667 /// (finetune: 2.2, from-scratch: 3.0).
668 #[arg(long)]
669 target_val_loss: Option<f32>,
670 /// Vocabulary size (required for `--mode from-scratch` INV-TRAIN-005
671 /// regime-dependent cap: 2·ln(vocab_size)). MODEL-2 uses 50257.
672 #[arg(long, default_value = "50257")]
673 vocab_size: u32,
674 /// Synthetic-drive only — do not attempt real compute, exercise loop gates only.
675 /// INV-TRAIN-010: absent = real compute (drive_real), present = synthetic (drive_synthetic).
676 #[arg(long, action = clap::ArgAction::SetTrue)]
677 synthetic: bool,
678 /// Training backend. Grammar (contract gpu-training-backend-v1
679 /// INV-GPUTRAIN-001): `^(cpu|cuda(:[0-9]|:1[0-5])?|auto)$`.
680 /// Default `auto` uses CUDA if available, else CPU (the only
681 /// spelling that may fall back silently — all other values
682 /// hard-fail on missing runtime per GATE-GPUTRAIN-002).
683 #[arg(long, default_value = "auto")]
684 device: String,
685 /// Initial weights from a pretrained APR file
686 /// (contract `apr-pretrain-from-init-v1`). Per spec §49's
687 /// MODEL-2 pretrained-init pivot: when present, load weights
688 /// from `<PATH>` instead of random-init. Composes with
689 /// `--mode finetune` (canonical) or `--mode from-scratch`
690 /// (allowed but non-canonical — emits a warning). Missing,
691 /// corrupted, or arch-mismatched APR files exit non-zero
692 /// before step 1 (no silent random-init fallback).
693 #[arg(long, value_name = "PATH")]
694 init: Option<PathBuf>,
695 /// SPEC §83 P0-J: bypass the Chinchilla compute-optimal hard
696 /// gate (`chinchilla-gate-v1`). Default is fail-fast when
697 /// D/N < 10× (severely under-provisioned per Hoffmann et al.
698 /// 2022). Pass this flag to acknowledge the under-provisioning
699 /// and proceed anyway (e.g. for ablation studies, resumed
700 /// runs, or smoke tests).
701 #[arg(long, action = clap::ArgAction::SetTrue)]
702 force_under_provisioned: bool,
703 /// SPEC §84 P2-F: shared held-out validation shard.
704 ///
705 /// When provided, the val-loss eval reads `HELD_OUT_BATCHES`
706 /// batches from this separate `.bin`-shards directory instead
707 /// of stealing the first 16 batches of `--dataset`. This makes
708 /// `val_loss` comparable across runs whose `--dataset`
709 /// composition changes (P2-C's audit-falsified result was
710 /// confounded by val sets being drawn from different corpus
711 /// distributions — qwen-v2 = codeparrot only, qwen-v3 =
712 /// codeparrot + the-stack-dedup).
713 ///
714 /// Path semantics: directory of `.bin` shards (same format as
715 /// `--dataset`). Operator tokenizes the held-out corpus
716 /// independently via `apr tokenize encode-corpus --max-docs N`
717 /// to a separate output dir, then passes that dir here. The
718 /// shard contract is `contracts/dataset-thestack-python-v1.yaml`.
719 ///
720 /// When omitted, falls back to the historical "first 16
721 /// batches of --dataset" behaviour for backwards compatibility.
722 #[arg(long, value_name = "DIR")]
723 val_shard: Option<PathBuf>,
724 },
725 /// Tokenizer training pipeline (plan/apply) — BPE vocabulary learning
726 Tokenize {
727 #[command(subcommand)]
728 command: TokenizeCommands,
729 },
730 /// Data quality pipeline (audit, split, balance) — powered by alimentar
731 Data {
732 #[command(subcommand)]
733 command: DataCommands,
734 },
735 /// Pipeline orchestration (plan/apply/status) — wraps forjar DAG engine
736 Pipeline {
737 #[command(subcommand)]
738 command: PipelineCommands,
739 },
740 /// Automated Five Whys diagnosis on a training checkpoint
741 Diagnose {
742 /// Path to checkpoint directory
743 #[arg(value_name = "CHECKPOINT_DIR")]
744 checkpoint_dir: PathBuf,
745 /// Test data file (JSONL) for evaluation
746 #[arg(long, value_name = "FILE")]
747 data: Option<PathBuf>,
748 /// Model size hint: "0.5B", "tiny"
749 #[arg(long)]
750 model_size: Option<String>,
751 /// Number of output classes (default: 5)
752 #[arg(long, default_value = "5")]
753 num_classes: usize,
754 },
755 /// Lint an Ollama /api/chat response for schema + NDJSON invariants (CRUX-C-04)
756 OllamaChatLint {
757 /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
758 #[arg(long, value_name = "FILE")]
759 response_file: PathBuf,
760 /// Treat input as NDJSON stream (one frame per line)
761 #[arg(long)]
762 stream: bool,
763 },
764 /// Lint an Ollama /api/chat function-calling response (CRUX-I-04)
765 OllamaToolsLint {
766 /// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
767 #[arg(long, value_name = "FILE")]
768 response_file: PathBuf,
769 /// Optional captured request JSON — enables tool-name allowlist gate
770 /// (every called tool name must appear in request.tools[*].function.name)
771 #[arg(long, value_name = "FILE")]
772 request_file: Option<PathBuf>,
773 /// Treat input as NDJSON stream (one frame per line)
774 #[arg(long)]
775 stream: bool,
776 },
777 /// Lint a captured DRY-sampling observation (CRUX-C-23)
778 DrySamplingLint {
779 /// Path to observation JSON
780 #[arg(long, value_name = "FILE")]
781 observation_file: PathBuf,
782 },
783 /// Lint a captured AWQ quality/compression/flags observation (CRUX-B-08)
784 AwqLint {
785 /// Path to captured AWQ observation JSON
786 #[arg(long, value_name = "FILE")]
787 observation_file: PathBuf,
788 },
789 /// Lint a captured FP8 (E4M3) round-trip + SM-capability observation (CRUX-B-11)
790 Fp8Lint {
791 /// Path to captured observation JSON (frobenius, capability blocks)
792 #[arg(long, value_name = "FILE")]
793 observation_file: PathBuf,
794 },
795 /// Lint a captured NF4 codebook/roundtrip/storage/parity observation (CRUX-B-10)
796 Nf4Lint {
797 /// Path to captured NF4 observation JSON
798 #[arg(long, value_name = "FILE")]
799 observation_file: PathBuf,
800 },
801 /// Lint a captured GPTQ compression/cosine/flags observation (CRUX-B-09)
802 GptqLint {
803 /// Path to captured GPTQ observation JSON
804 #[arg(long, value_name = "FILE")]
805 observation_file: PathBuf,
806 },
807 /// Lint a captured CUDA OOM postmortem report (CRUX-F-13)
808 OomLint {
809 /// Path to captured OOM postmortem JSON (e.g. /tmp/apr-oom-<ts>.json)
810 #[arg(long, value_name = "FILE")]
811 report_file: PathBuf,
812 /// Optional captured stderr log to verify the OOM_REPORT breadcrumb
813 #[arg(long, value_name = "FILE")]
814 stderr_file: Option<PathBuf>,
815 },
816 /// Lint a captured NCCL failure-diagnostics JSON from stderr (CRUX-F-15)
817 NcclDiagLint {
818 /// Path to captured stderr JSON diagnostic
819 #[arg(long, value_name = "FILE")]
820 diag_file: PathBuf,
821 /// Optional observed exit code (gate: >= 128 = NCCL class)
822 #[arg(long, value_name = "I32")]
823 exit_code: Option<i32>,
824 /// Require the `suggest` field to cite an nvidia.com / NVIDIA/nccl URL
825 #[arg(long)]
826 require_doc_link: bool,
827 },
828 /// Lint a captured `apr agent --trace` ReAct loop trace (CRUX-I-06)
829 ReactTraceLint {
830 /// Path to captured trace JSON
831 #[arg(long, value_name = "FILE")]
832 trace_file: PathBuf,
833 /// Optional max_iterations budget the trace was produced under
834 #[arg(long, value_name = "N")]
835 max_iterations: Option<i64>,
836 /// Require the scratchpad to parse cleanly as Thought/Action/Observation blocks
837 #[arg(long)]
838 require_grammar: bool,
839 },
840 /// Lint a captured `$APR_TRACE_DIR` hang stack-dump directory (CRUX-F-14)
841 HangTraceLint {
842 /// Path to the captured trace directory
843 #[arg(long, value_name = "DIR")]
844 trace_dir: PathBuf,
845 /// Inspection mode: `timeout` (expects per-rank dumps) or `success` (expects empty dir)
846 #[arg(long, value_name = "MODE", default_value = "timeout")]
847 mode: String,
848 /// Expected world_size when mode=timeout (number of rank{N}.py.txt files)
849 #[arg(long, value_name = "N", default_value_t = 2)]
850 world_size: usize,
851 /// Actual exit code from the run under inspection (for exit-code gate)
852 #[arg(long, value_name = "I32")]
853 exit_code: Option<i32>,
854 /// Expected exit code (typically 124 for timeout, 1 for other error, 0 for success)
855 #[arg(long, value_name = "I32")]
856 expected_exit_code: Option<i32>,
857 },
858 /// Lint two captured `apr finetune --parallel ddp --json` outputs (N=1, N=k) (CRUX-D-11)
859 DdpMetricsLint {
860 /// Path to N=1 metrics JSON
861 #[arg(long, value_name = "FILE")]
862 metrics_1gpu_file: PathBuf,
863 /// Path to N=world_size metrics JSON
864 #[arg(long, value_name = "FILE")]
865 metrics_ngpu_file: PathBuf,
866 /// World size used for --metrics-ngpu-file run (>= 2)
867 #[arg(long, value_name = "N")]
868 world_size: i64,
869 /// Scaling-efficiency floor (default 0.85, PyTorch DDP convention)
870 #[arg(long, value_name = "F", default_value_t = 0.85)]
871 scaling_floor: f64,
872 /// Loss-parity relative tolerance (default 0.01)
873 #[arg(long, value_name = "F", default_value_t = 0.01)]
874 loss_tolerance: f64,
875 },
876 /// Lint a captured `apr dataset audio-inspect --format json` body (CRUX-H-13)
877 AudioInspectLint {
878 /// Path to captured JSON body
879 #[arg(long, value_name = "FILE")]
880 json_file: PathBuf,
881 /// Optional expected sample_rate (typically the `--resample-to` arg)
882 #[arg(long, value_name = "U32")]
883 expected_sample_rate: Option<u32>,
884 /// Optional expected channel count (1 = mono after --mono)
885 #[arg(long, value_name = "U32")]
886 expected_channels: Option<u32>,
887 },
888 /// Lint captured flash-attn2 parity + provenance JSON outputs (CRUX-L-02)
889 AttnParityLint {
890 /// Path to captured `apr kernel parity --impl flash2 --ref naive --json` body
891 #[arg(long, value_name = "FILE")]
892 parity_file: Option<PathBuf>,
893 /// Path to captured `apr run --attn flash2 --json` body for provenance check
894 #[arg(long, value_name = "FILE")]
895 provenance_file: Option<PathBuf>,
896 /// Path to captured head_dim error JSON
897 #[arg(long, value_name = "FILE")]
898 head_dim_error_file: Option<PathBuf>,
899 /// Max absolute diff tolerance (default 5e-3, FlashAttention-2 bound)
900 #[arg(long, value_name = "F", default_value_t = 5e-3)]
901 tol_abs: f64,
902 /// Min cosine similarity floor (default 0.9999)
903 #[arg(long, value_name = "F", default_value_t = 0.9999)]
904 tol_cos: f64,
905 },
906 /// Lint a captured `apr attn-viz` attention dump (CRUX-F-17)
907 AttnVizLint {
908 /// Path to attention dump in JSON form (4-D [layers][heads][rows][cols] floats)
909 #[arg(long, value_name = "FILE")]
910 attn_file: Option<PathBuf>,
911 /// Path to HTML heatmap output
912 #[arg(long, value_name = "FILE")]
913 html_file: Option<PathBuf>,
914 /// Minimum <svg|<canvas open-tag count expected in HTML (|layers|*|heads|)
915 #[arg(long, value_name = "N", default_value_t = 1)]
916 expected_heatmaps: usize,
917 /// Row-softmax normalization tolerance (default 1e-5)
918 #[arg(long, value_name = "F64", default_value_t = 1e-5)]
919 tolerance: f64,
920 /// Causal-mask zero epsilon (default 1e-9)
921 #[arg(long, value_name = "F64", default_value_t = 1e-9)]
922 epsilon: f64,
923 },
924 /// Lint a captured `apr trace --check-finite` error JSON and/or `--list` coverage JSON (CRUX-F-11)
925 CheckFiniteLint {
926 /// Captured stderr JSON from `apr trace --check-finite` on a poisoned model
927 #[arg(long, value_name = "FILE")]
928 error_file: Option<PathBuf>,
929 /// Captured stdout JSON from `apr trace --check-finite --list`
930 #[arg(long, value_name = "FILE")]
931 list_file: Option<PathBuf>,
932 /// Minimum layer-coverage count when `--list-file` is supplied (default 100)
933 #[arg(long, value_name = "N", default_value_t = 100)]
934 min_layers: usize,
935 },
936 /// Lint a captured `apr debug embed-viz` CSV (CRUX-F-18)
937 EmbedVizLint {
938 /// Path to captured embed-viz CSV (token_id,token_str,x,y)
939 #[arg(long, value_name = "FILE")]
940 csv_file: PathBuf,
941 /// Expected row count == vocab_size (optional)
942 #[arg(long, value_name = "N")]
943 expected_vocab_size: Option<usize>,
944 /// Second CSV captured under the same seed for determinism check (optional)
945 #[arg(long, value_name = "FILE")]
946 csv_file_b: Option<PathBuf>,
947 },
948 /// Lint a captured `apr explain --format jsonl` token-selection trace (CRUX-F-19)
949 ExplainTokenLint {
950 /// Path to captured JSONL body (one sampled-token record per line)
951 #[arg(long, value_name = "FILE")]
952 jsonl_file: PathBuf,
953 /// Tolerance for `Σ post_prob ≈ 1.0` (default 1e-5)
954 #[arg(long, value_name = "F64", default_value_t = 1e-5)]
955 tolerance: f64,
956 /// Assert greedy decoding: sampled_id must equal argmax(pre_prob)
957 #[arg(long)]
958 require_greedy: bool,
959 },
960 /// Lint a captured GPU memory Chrome Trace Event Format JSON (CRUX-F-07)
961 GpuMemtraceLint {
962 /// Path to captured Chrome Trace JSON from `apr profile --gpu-memory-trace`
963 #[arg(long, value_name = "FILE")]
964 trace_file: PathBuf,
965 },
966 /// Lint a captured KV-cache utilization timeline (CRUX-F-06)
967 KvTimelineLint {
968 /// Path to captured `apr profile --kv-timeline --json` body
969 #[arg(long, value_name = "FILE")]
970 timeline_file: PathBuf,
971 /// Preemption threshold (default 0.95, vLLM canonical)
972 #[arg(long, value_name = "FRACTION", default_value_t = 0.95)]
973 preempt_threshold: f64,
974 },
975 /// Lint a captured OTLP/JSON ExportTraceServiceRequest body (CRUX-K-08)
976 OtlpLint {
977 /// Path to captured OTLP/JSON export body
978 #[arg(long, value_name = "FILE")]
979 otlp_file: PathBuf,
980 /// Require at least one `apr.inference` span to be present
981 #[arg(long)]
982 require_apr_span: bool,
983 /// Require gen_ai.* and apr.tokens.* attribute keys on some span
984 #[arg(long)]
985 require_genai_attrs: bool,
986 /// Verify W3C trace-context propagation: expect this 32-hex traceId
987 #[arg(long, value_name = "HEX32")]
988 expect_trace_id: Option<String>,
989 },
990 /// Lint a captured Prometheus /metrics response (CRUX-K-07)
991 PrometheusLint {
992 /// Path to captured /metrics response body (text/plain; version=0.0.4)
993 #[arg(long, value_name = "FILE")]
994 metrics_file: PathBuf,
995 /// Optional captured Content-Type header to verify against version=0.0.4
996 #[arg(long, value_name = "HEADER")]
997 content_type: Option<String>,
998 /// Require the K-07 metric set (apr_num_requests_running, ...) to be present
999 #[arg(long)]
1000 require_k07_metrics: bool,
1001 },
1002 /// Lint a captured OpenAI tool-use response (CRUX-C-11)
1003 ToolUseLint {
1004 /// Path to captured OpenAI tool-use response JSON
1005 #[arg(long, value_name = "FILE")]
1006 observation_file: PathBuf,
1007 },
1008 /// Lint a GBNF grammar-constrained observation (CRUX-C-10)
1009 GbnfLint {
1010 /// Path to captured GBNF observation JSON
1011 #[arg(long, value_name = "FILE")]
1012 observation_file: PathBuf,
1013 },
1014 /// Lint a typical-p sampling observation (CRUX-C-22)
1015 TypicalPLint {
1016 #[arg(long, value_name = "FILE")]
1017 observation_file: PathBuf,
1018 },
1019 /// Gradient-norm telemetry analysis (CRUX-F-09)
1020 GradNorm {
1021 /// Path to JSON file of per-step grad-norm records
1022 #[arg(long, value_name = "FILE")]
1023 history_file: PathBuf,
1024 /// Maximum allowed clipped grad-norm (for cap-violation check)
1025 #[arg(long, value_name = "M")]
1026 max_grad_norm: Option<f64>,
1027 /// Rolling-median window size for spike detection (in steps)
1028 #[arg(long, default_value = "16")]
1029 spike_window: usize,
1030 /// Multiplier threshold for spike detection
1031 #[arg(long, default_value = "10.0")]
1032 spike_multiplier: f64,
1033 },
1034 /// Lint a captured registry byte-quota observation (CRUX-A-22)
1035 RegistryQuotaLint {
1036 /// Path to captured quota/atomic/ceiling observation JSON
1037 #[arg(long, value_name = "FILE")]
1038 observation_file: PathBuf,
1039 },
1040 /// Lint a captured imatrix calibration observation (CRUX-B-07)
1041 ImatrixLint {
1042 /// Path to captured imatrix observation JSON
1043 #[arg(long, value_name = "FILE")]
1044 observation_file: PathBuf,
1045 },
1046 /// Lint a captured /v1/embeddings observation (CRUX-C-13)
1047 EmbeddingsLint {
1048 #[arg(long, value_name = "FILE")]
1049 observation_file: PathBuf,
1050 },
1051 /// Lint a captured Hub+local unified-search merge observation (CRUX-A-23)
1052 UnifiedSearchLint {
1053 /// Path to captured unified-search observation JSON
1054 #[arg(long, value_name = "FILE")]
1055 observation_file: PathBuf,
1056 },
1057 /// Lint a captured `apr rm` / `apr gc` blob-GC observation (CRUX-A-25)
1058 RmGcLint {
1059 /// Path to captured rm/gc observation JSON
1060 #[arg(long, value_name = "FILE")]
1061 observation_file: PathBuf,
1062 },
1063 /// Lint a captured APR_MODELS shared-cache observation (CRUX-A-21)
1064 SharedCacheLint {
1065 /// Path to captured dedup/permission observation JSON
1066 #[arg(long, value_name = "FILE")]
1067 observation_file: PathBuf,
1068 },
1069 /// Perplexity classifier (CRUX-E-02)
1070 Ppl {
1071 /// JSON file containing an array of per-token natural-log
1072 /// probabilities (e.g. `[-1.2, -0.5, -2.1, ...]`). Required.
1073 #[arg(long, value_name = "FILE")]
1074 log_probs_file: PathBuf,
1075 },
1076 /// Validate dequant→requant metadata preservation (CRUX-B-19)
1077 QuantPreservationLint {
1078 /// Reference GGUF (pre-roundtrip)
1079 #[arg(long, value_name = "REF.gguf")]
1080 reference: PathBuf,
1081 /// Requantized GGUF (post-roundtrip)
1082 #[arg(long, value_name = "REQ.gguf")]
1083 requant: PathBuf,
1084 },
1085 /// Split a safetensors file into shards + weight-map index (CRUX-B-05)
1086 Shard {
1087 /// Single-file safetensors model to split
1088 #[arg(value_name = "FILE")]
1089 file: PathBuf,
1090 /// Maximum size of each shard (e.g. 5GB, 500MB, 1.5GiB)
1091 #[arg(long, value_name = "SIZE", default_value = "5GB")]
1092 max_shard_size: String,
1093 /// Output directory for shards + model.safetensors.index.json
1094 #[arg(short, long, value_name = "DIR")]
1095 output: PathBuf,
1096 },
1097 /// Reconstruct a single safetensors file from a sharded directory (CRUX-B-05)
1098 Unshard {
1099 /// Sharded directory containing model.safetensors.index.json
1100 #[arg(value_name = "DIR")]
1101 input: PathBuf,
1102 /// Output single-file safetensors path
1103 #[arg(short, long, value_name = "FILE")]
1104 output: PathBuf,
1105 },
1106 /// Publishing, conversion, and analysis tools
1107 #[command(flatten)]
1108 Tools(ToolCommands),
1109 /// Score a query/passage pair (or rank multiple passages) with a BERT
1110 /// cross-encoder loaded from an APR v2 file (GH-326 Phase 3).
1111 ///
1112 /// Wraps `aprender_core::models::bert::CrossEncoder::load_from_reader`
1113 /// + `score()`. The APR must contain the canonical HF BERT tensor
1114 /// names (see `models::bert::expected_bert_tensor_names`).
1115 ///
1116 /// Tokenisation is NOT applied here — caller passes pre-tokenised
1117 /// `input_ids` + `token_type_ids` as comma-delimited u32 lists. A
1118 /// dedicated tokeniser-aware mode is Phase 3b follow-up scope.
1119 Rerank {
1120 /// Path to the APR file containing the cross-encoder weights.
1121 #[arg(value_name = "MODEL")]
1122 model: PathBuf,
1123 /// Pre-tokenised input ids (comma-separated `u32`s). Mutually
1124 /// exclusive with `--query`+`--passage`+`--vocab` (Phase 3b).
1125 /// Example: `--input-ids 101,2024,102,3456,102` for `[CLS] q [SEP] p [SEP]`.
1126 #[arg(long, value_name = "IDS")]
1127 input_ids: Option<String>,
1128 /// Pre-tokenised token-type ids (comma-separated `u32`s).
1129 /// Same length as `--input-ids`. 0 for query side, 1 for passage.
1130 #[arg(long, value_name = "IDS")]
1131 token_type_ids: Option<String>,
1132 /// Phase 3b — query text. Pair with `--passage` + `--vocab` to enable
1133 /// in-process WordPiece tokenisation. The tokeniser builds
1134 /// `[CLS] query [SEP] passage [SEP]` with `token_type_ids = 0` for
1135 /// the query side and `1` for the passage side.
1136 #[arg(long, value_name = "TEXT")]
1137 query: Option<String>,
1138 /// Phase 3b — passage text. Required when `--query` is supplied
1139 /// in single-pair mode (use `--passages` for batch ranking).
1140 #[arg(long, value_name = "TEXT")]
1141 passage: Option<String>,
1142 /// Phase 5 — batch ranking mode (#326). Passage candidates to
1143 /// score against `--query`. May be supplied multiple times:
1144 /// `apr rerank model.apr --query "..." --passages "p1" --passages "p2"`.
1145 /// Mutually exclusive with `--passage`. Output is one
1146 /// `score[i]` line per passage in input order, OR a JSON array
1147 /// of `{passage, logit, score}` objects sorted by descending
1148 /// score when `--sort` is set.
1149 #[arg(long, value_name = "TEXT")]
1150 passages: Vec<String>,
1151 /// Phase 5 — sort batch output by descending score (highest
1152 /// relevance first). Only meaningful with `--passages` and
1153 /// `--json`. Default: preserve input order.
1154 #[arg(long)]
1155 sort: bool,
1156 /// Phase 5 — limit to top-K passages after sorting. Implies
1157 /// `--sort`. Default 0 (no limit).
1158 #[arg(long, default_value_t = 0)]
1159 top_k: usize,
1160 /// Phase 3b — path to a WordPiece `vocab.txt` (one token per line,
1161 /// line index = token id). Required when `--query` is supplied.
1162 /// Must contain entries for `[CLS]`, `[SEP]`, and `[UNK]`.
1163 /// Phase 4 accepts HuggingFace `tokenizer.json` (extension-detected).
1164 #[arg(long, value_name = "FILE")]
1165 vocab: Option<PathBuf>,
1166 /// Override hidden_dim (default: 384 / MiniLM-L-6).
1167 #[arg(long, default_value_t = 384)]
1168 hidden_dim: usize,
1169 /// Override num_layers (default: 6 / MiniLM-L-6).
1170 #[arg(long, default_value_t = 6)]
1171 num_layers: usize,
1172 /// Override num_heads (default: 12 / MiniLM-L-6).
1173 #[arg(long, default_value_t = 12)]
1174 num_heads: usize,
1175 /// Override intermediate_dim (default: 1536 / MiniLM-L-6).
1176 #[arg(long, default_value_t = 1536)]
1177 intermediate_dim: usize,
1178 /// Override vocab_size (default: 30522 / bert-base-uncased).
1179 #[arg(long, default_value_t = 30522)]
1180 vocab_size: usize,
1181 /// Override max_position_embeddings (default: 512).
1182 #[arg(long, default_value_t = 512)]
1183 max_position_embeddings: usize,
1184 /// Override type_vocab_size (default: 2).
1185 #[arg(long, default_value_t = 2)]
1186 type_vocab_size: usize,
1187 /// Number of labels in the classifier head (default: 1 for
1188 /// regression-style relevance scoring).
1189 #[arg(long, default_value_t = 1)]
1190 num_labels: usize,
1191 /// Load the optional BERT pooler dense layer (default: true).
1192 /// Cross-encoders that skip the pooler should pass `--with-pooler false`.
1193 #[arg(long, default_value_t = true)]
1194 with_pooler: bool,
1195 /// Emit the raw logit instead of the sigmoid-mapped relevance score.
1196 #[arg(long)]
1197 raw_logit: bool,
1198 /// Output as JSON.
1199 #[arg(long)]
1200 json: bool,
1201 },
1202}
1203
1204#[cfg(feature = "training")]
1205/// Subcommands for `apr runs` — experiment run management (ALB-050/051)
1206#[derive(Subcommand, Debug)]
1207pub enum RunsCommands {
1208 /// List all training experiment runs (with inline loss sparklines)
1209 Ls {
1210 /// Directory to scan for experiments (default: current dir)
1211 #[arg(long, value_name = "DIR")]
1212 dir: Option<PathBuf>,
1213 /// Read from global experiment registry (~/.entrenar/experiments.db)
1214 #[arg(long)]
1215 global: bool,
1216 /// Filter by status: running, completed, failed, all
1217 #[arg(long, default_value = "all")]
1218 status: String,
1219 /// Output as JSON
1220 #[arg(long)]
1221 json: bool,
1222 /// Maximum number of runs to show
1223 #[arg(long, default_value = "50")]
1224 limit: usize,
1225 },
1226 /// Show detailed metrics for a specific run (with braille loss curve)
1227 Show {
1228 /// Run ID
1229 #[arg(value_name = "RUN_ID")]
1230 run_id: String,
1231 /// Directory containing experiment DB
1232 #[arg(long, value_name = "DIR")]
1233 dir: Option<PathBuf>,
1234 /// Read from global registry
1235 #[arg(long)]
1236 global: bool,
1237 /// Output as JSON
1238 #[arg(long)]
1239 json: bool,
1240 },
1241 /// Compare two runs side-by-side (loss curves, config diff, metrics)
1242 Diff {
1243 /// First run ID
1244 #[arg(value_name = "RUN_A")]
1245 run_a: String,
1246 /// Second run ID
1247 #[arg(value_name = "RUN_B")]
1248 run_b: String,
1249 /// Directory containing experiment DB
1250 #[arg(long, value_name = "DIR")]
1251 dir: Option<PathBuf>,
1252 /// Read from global registry
1253 #[arg(long)]
1254 global: bool,
1255 /// Output as JSON
1256 #[arg(long)]
1257 json: bool,
1258 },
1259}
1260
1261#[cfg(feature = "training")]
1262/// Subcommands for `apr experiment` — interactive experiment browser (ALB-024)
1263#[derive(Subcommand, Debug)]
1264pub enum ExperimentCommands {
1265 /// Browse experiment history with interactive TUI (loss curves, params)
1266 View {
1267 /// Path to experiment database file
1268 #[arg(long, value_name = "FILE")]
1269 db: Option<PathBuf>,
1270 /// Read from global experiment registry (~/.entrenar/experiments.db)
1271 #[arg(long)]
1272 global: bool,
1273 /// Output as JSON (non-interactive)
1274 #[arg(long)]
1275 json: bool,
1276 },
1277}
1278
1279/// CRUX-K-11: Subcommands for `apr modelfile`.
1280#[derive(Subcommand, Debug)]
1281pub enum ModelfileSubcommand {
1282 /// Parse an Ollama-style Modelfile and emit the parsed config.
1283 ///
1284 /// Grammar: `FROM`, `PARAMETER`, `TEMPLATE`, `SYSTEM`, `LICENSE`,
1285 /// `MESSAGE`, `ADAPTER` directives. Triple-quoted blocks supported.
1286 /// Directive names are case-insensitive. Unknown directives raise
1287 /// `file:line:col` errors.
1288 Parse {
1289 /// Path to the Modelfile
1290 #[arg(value_name = "FILE")]
1291 file: PathBuf,
1292 /// Output format: `json` or `human`
1293 #[arg(long, default_value = "json")]
1294 format: String,
1295 },
1296}
1297
1298/// GH-876: Subcommands for `apr probar` — consolidates the probador testing
1299/// framework under `apr`. Milestone 1 ships only `tensor` (the migrated
1300/// existing behavior). Subsequent milestones add the remaining 14 probador
1301/// subcommands as separate PRs that delegate to the probador library.
1302#[derive(Subcommand, Debug)]
1303pub enum ProbarSubcommand {
1304 /// Export tensor activations for visual regression testing (PMAT-481).
1305 ///
1306 /// Generates JSON/PNG per-layer test artifacts that can be compared
1307 /// against a golden reference directory to detect regressions in
1308 /// model behavior after weight updates, quantization, or refactors.
1309 Tensor {
1310 /// Path to .apr model file
1311 #[arg(value_name = "FILE")]
1312 file: PathBuf,
1313 /// Output directory for test artifacts
1314 #[arg(short, long, default_value = "./probar-export")]
1315 output: PathBuf,
1316 /// Export format: json, png, or both
1317 #[arg(long, default_value = "both")]
1318 format: String,
1319 /// Golden reference directory for comparison
1320 #[arg(long)]
1321 golden: Option<PathBuf>,
1322 /// Filter layers by name pattern
1323 #[arg(long)]
1324 layer: Option<String>,
1325 /// Exit non-zero on golden divergence (CI mode, PMAT-481)
1326 #[arg(long)]
1327 assert: bool,
1328 /// Cosine similarity threshold for golden comparison (default: 0.98)
1329 #[arg(long, default_value = "0.98")]
1330 tolerance: f32,
1331 },
1332}