1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
/// Extended CLI commands (analysis, profiling, QA, benchmarks, and advanced tools).
///
/// Flattened into `Commands` via `#[command(flatten)]` so all subcommands remain
/// top-level from the user's perspective (e.g., `apr chat`, `apr profile`).
#[derive(Subcommand, Debug)]
pub enum ExtendedCommands {
/// Interactive chat with language model
Chat {
/// Path to .apr model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Sampling temperature (0 = greedy, higher = more random)
#[arg(long, default_value = "0.7")]
temperature: f32,
/// Nucleus sampling threshold
#[arg(long, default_value = "0.9")]
top_p: f32,
/// Maximum tokens to generate per response
#[arg(long, default_value = "512")]
max_tokens: usize,
/// System prompt to set model behavior
#[arg(long)]
system: Option<String>,
/// Show inspection info (top-k probs, tokens/sec)
#[arg(long)]
inspect: bool,
/// Disable GPU acceleration (use CPU)
#[arg(long)]
no_gpu: bool,
/// Force GPU acceleration (requires CUDA)
#[arg(long)]
gpu: bool,
/// Enable inference tracing (APR-TRACE-001)
#[arg(long)]
trace: bool,
/// Trace specific steps only (comma-separated)
#[arg(long, value_delimiter = ',')]
trace_steps: Option<Vec<String>>,
/// Verbose tracing
#[arg(long)]
trace_verbose: bool,
/// Save trace output to JSON file
#[arg(long, value_name = "FILE")]
trace_output: Option<PathBuf>,
/// Trace detail level (none, basic, layer, payload)
#[arg(long, value_name = "LEVEL", default_value = "basic")]
trace_level: String,
/// Enable inline Roofline profiling (PMAT-SHOWCASE-METHODOLOGY-001)
#[arg(long)]
profile: bool,
/// PMAT-488: Compute backend override (cuda, cpu, wgpu)
#[arg(long, value_name = "BACKEND")]
backend: Option<String>,
},
/// Benchmark throughput (spec H12: >= 10 tok/s)
Bench {
/// Path to model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Number of warmup iterations
#[arg(long, default_value = "3")]
warmup: usize,
/// Number of measurement iterations
#[arg(long, default_value = "5")]
iterations: usize,
/// Max tokens to generate per iteration
#[arg(long, default_value = "32")]
max_tokens: usize,
/// Test prompt
#[arg(long)]
prompt: Option<String>,
/// Use realizar for fast inference (vs aprender baseline)
#[arg(long)]
fast: bool,
/// Benchmark specific brick
#[arg(long)]
brick: Option<String>,
/// Comma-separated latency percentile points for JSON output
/// (CRUX-E-07). Default: `50,95,99`. Values must be in (0, 100].
#[arg(long, value_delimiter = ',', default_value = "50,95,99")]
percentiles: Vec<f64>,
},
/// Evaluate model perplexity (spec H13: PPL <= 20) or classification metrics
Eval {
/// Path to model file or checkpoint directory
#[arg(value_name = "FILE")]
file: PathBuf,
/// Dataset: wikitext-2, lambada, or custom
#[arg(long, default_value = "wikitext-2")]
dataset: String,
/// Custom text (when dataset=custom)
#[arg(long)]
text: Option<String>,
/// Maximum tokens to evaluate
#[arg(long, default_value = "512")]
max_tokens: usize,
/// Perplexity threshold for pass/fail
#[arg(long, default_value = "20.0")]
threshold: f32,
/// Task type: omit for perplexity, "classify" for classification eval
#[arg(long)]
task: Option<String>,
/// Test data file (JSONL) for classification evaluation
#[arg(long, value_name = "FILE")]
data: Option<PathBuf>,
/// Model size hint: "0.5B", "tiny" (for classification eval)
#[arg(long)]
model_size: Option<String>,
/// Number of output classes (default: 5)
#[arg(long, default_value = "5")]
num_classes: usize,
/// Generate HuggingFace model card (README.md) in checkpoint dir
#[arg(long)]
generate_card: bool,
/// Device for inference: "cpu" (default) or "cuda" (GPU-accelerated, ALB-089)
#[arg(long, default_value = "cpu")]
device: String,
/// Number of samples per problem for pass@k (ALB-088, default: 1)
#[arg(long, default_value = "1")]
samples: usize,
/// Sampling temperature (0.0 = greedy, 0.8 = standard for pass@k>1)
#[arg(long, default_value = "0.0")]
temperature: f32,
},
/// Deep profiling with Roofline analysis
Profile {
/// Path to model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Layer-by-layer granular analysis
#[arg(long)]
granular: bool,
/// Output format (human, json, flamegraph)
#[arg(long, default_value = "human")]
format: String,
/// Focus on specific operation
#[arg(long)]
focus: Option<String>,
/// Detect naive implementations
#[arg(long)]
detect_naive: bool,
/// GFLOPS threshold for naive detection
#[arg(long, default_value = "10.0")]
threshold: f64,
/// Compare against HuggingFace baseline
#[arg(long)]
compare_hf: Option<String>,
/// Measure energy consumption (requires RAPL)
#[arg(long)]
energy: bool,
/// Compute performance grade (vs Ollama baseline)
#[arg(long)]
perf_grade: bool,
/// Show call graph
#[arg(long)]
callgraph: bool,
/// Exit non-zero if naive implementation detected
#[arg(long)]
fail_on_naive: bool,
/// Output file path for flamegraph SVG (GH-174, PMAT-182)
#[arg(long, short = 'o')]
output: Option<PathBuf>,
// PMAT-192: CI Assertion Mode (GH-180)
/// Enable CI mode with assertion checks (exits 1 on failure)
#[arg(long)]
ci: bool,
/// Minimum throughput in tok/s (CI assertion, exits 1 if below)
#[arg(long)]
assert_throughput: Option<f64>,
/// Maximum p99 latency in ms (CI assertion, exits 1 if above)
#[arg(long)]
assert_p99: Option<f64>,
/// Maximum p50 latency in ms (CI assertion, exits 1 if above)
#[arg(long)]
assert_p50: Option<f64>,
/// Warmup passes before measurement (default: 3)
#[arg(long, default_value = "3")]
warmup: usize,
/// Measurement passes (default: 10)
#[arg(long, default_value = "10")]
measure: usize,
/// Number of tokens to generate per measurement pass (default: 32)
#[arg(long, default_value = "32")]
tokens: usize,
/// Compare against Ollama baseline (runs ollama for comparison)
#[arg(long)]
ollama: bool,
/// Disable GPU (force CPU-only profiling)
#[arg(long)]
no_gpu: bool,
/// Compare against another model format (F-PROFILE-011)
#[arg(long, value_name = "FILE")]
compare: Option<PathBuf>,
},
/// Falsifiable QA checklist for model releases
Qa {
/// Path to model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Minimum throughput threshold in tok/s
#[arg(long, value_name = "TPS")]
assert_tps: Option<f64>,
/// Minimum speedup vs Ollama
#[arg(long, value_name = "SPEEDUP")]
assert_speedup: Option<f64>,
/// Minimum GPU vs CPU speedup (F-PERF-042)
#[arg(long, value_name = "SPEEDUP")]
assert_gpu_speedup: Option<f64>,
/// Skip golden output test
#[arg(long)]
skip_golden: bool,
/// Skip throughput benchmark
#[arg(long)]
skip_throughput: bool,
/// Skip Ollama parity comparison
#[arg(long)]
skip_ollama: bool,
/// Skip GPU vs CPU speedup test (F-PERF-042)
#[arg(long)]
skip_gpu_speedup: bool,
/// Skip tensor contract validation (PMAT-235)
#[arg(long)]
skip_contract: bool,
/// Skip cross-format parity test (F-QUAL-032)
#[arg(long)]
skip_format_parity: bool,
/// Skip PTX parity validation (GH-219)
#[arg(long)]
skip_ptx_parity: bool,
/// SafeTensors model path for cross-format parity test (F-QUAL-032)
#[arg(long, value_name = "PATH")]
safetensors_path: Option<PathBuf>,
/// Number of benchmark iterations
#[arg(long, default_value = "10")]
iterations: usize,
/// Number of warmup iterations
#[arg(long, default_value = "3")]
warmup: usize,
/// Maximum tokens to generate
#[arg(long, default_value = "32")]
max_tokens: usize,
/// Output as JSON (for CI integration)
#[arg(long)]
json: bool,
/// Verbose output
#[arg(short, long)]
verbose: bool,
/// Minimum number of gates that must execute (fail if fewer)
#[arg(long, value_name = "N")]
min_executed: Option<usize>,
/// Previous QA report for regression detection
#[arg(long, value_name = "FILE")]
previous_report: Option<PathBuf>,
/// Maximum allowed performance regression ratio (default: 0.10 = 10%)
#[arg(long, value_name = "RATIO")]
regression_threshold: Option<f64>,
/// Skip GPU state isolation test
#[arg(long)]
skip_gpu_state: bool,
/// Skip metadata plausibility validation (Bug 210, GH-222)
#[arg(long)]
skip_metadata: bool,
/// Skip GPU capability match gate (GH-280)
#[arg(long)]
skip_capability: bool,
/// Assert classifier head presence and shape (F-CLASS-004)
#[arg(long)]
assert_classifier_head: bool,
},
/// GPU/CPU parity check (PMAT-232: genchi genbutsu — see where GPU diverges)
Parity {
/// Path to GGUF model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Prompt text (default: "What is 2+2?")
#[arg(short, long, default_value = "What is 2+2?")]
prompt: String,
/// Assert parity (exit non-zero on divergence)
#[arg(long)]
assert: bool,
},
/// Model-to-PTX source mapping (Mieruka: make GPU kernel dispatch visible)
#[command(name = "ptx-map")]
PtxMap {
/// Path to GGUF model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Filter to specific kernel (e.g., --kernel Q4KGemv)
#[arg(long)]
kernel: Option<String>,
/// Reverse lookup: kernel name -> which layers/steps use it
#[arg(long)]
reverse: Option<String>,
/// Output as JSON
#[arg(long)]
json: bool,
/// Full PTX snippets and detailed analysis
#[arg(short, long)]
verbose: bool,
/// Show batched prefill kernel variants instead of decode
#[arg(long)]
prefill: bool,
},
/// PTX analysis and bug detection (register pressure, roofline)
#[command(name = "ptx")]
Ptx {
/// Path to a PTX source file
#[arg(value_name = "FILE")]
file: Option<PathBuf>,
/// Analyze a named kernel from trueno-gpu
#[arg(long, short)]
kernel: Option<String>,
/// Strict mode (no performance whitelist)
#[arg(long)]
strict: bool,
/// Show only bug analysis (skip register/memory/roofline)
#[arg(long)]
bugs: bool,
/// Output as JSON
#[arg(long)]
json: bool,
/// Verbose output (include PTX source listing)
#[arg(short, long)]
verbose: bool,
},
/// ML tuning: LoRA/QLoRA configuration, memory planning, and HPO (GH-176, SPEC-TUNE-2026-001)
#[cfg(feature = "training")]
Tune {
/// Path to model file (optional if using --model)
#[arg(value_name = "FILE")]
file: Option<PathBuf>,
/// Tuning method: auto, full, lora, qlora
#[arg(long, short = 'm', default_value = "auto")]
method: String,
/// LoRA rank (default: auto-selected)
#[arg(long, short = 'r')]
rank: Option<u32>,
/// Available VRAM in GB
#[arg(long, default_value = "16.0")]
vram: f64,
/// Only plan configuration, don't train
#[arg(long)]
plan: bool,
/// Model size for planning (e.g., "7B", "1.5B")
#[arg(long, value_name = "SIZE")]
model: Option<String>,
/// Freeze base model weights
#[arg(long)]
freeze_base: bool,
/// Training data file (JSONL format)
#[arg(long, value_name = "FILE")]
train_data: Option<PathBuf>,
/// Output as JSON (for CI integration)
#[arg(long)]
json: bool,
/// Task type for HPO: classify (SPEC-TUNE-2026-001)
#[arg(long)]
task: Option<String>,
/// Number of HPO trials (default: 10)
#[arg(long, default_value = "10")]
budget: usize,
/// HPO search strategy: tpe, grid, random
#[arg(long, default_value = "tpe")]
strategy: String,
/// HPO scheduler: asha, median, none
#[arg(long, default_value = "asha")]
scheduler: String,
/// Scout mode: 1 epoch per trial for fast exploration
#[arg(long)]
scout: bool,
/// Training data file for HPO (JSONL format)
#[arg(long, value_name = "FILE")]
data: Option<PathBuf>,
/// Number of output classes for classification
#[arg(long, default_value = "5")]
num_classes: usize,
/// Model size hint for HPO (e.g., "0.5B", "1.5B")
#[arg(long)]
model_size: Option<String>,
/// Warm-start from scout phase results directory
#[arg(long, value_name = "DIR")]
from_scout: Option<PathBuf>,
/// Maximum epochs per trial (full mode, default: 20)
#[arg(long, default_value = "20")]
max_epochs: usize,
/// Maximum wall-clock time (e.g., "8h", "30m")
#[arg(long)]
time_limit: Option<String>,
},
/// Attach live TUI to a running training session
#[cfg(feature = "training")]
Monitor {
/// Experiment output directory (same as finetune -o)
#[arg(value_name = "DIR")]
dir: Option<PathBuf>,
/// Refresh interval in milliseconds
#[arg(long, default_value = "500")]
refresh_ms: u64,
/// Compact display mode
#[arg(long)]
compact: bool,
/// Output JSON lines instead of TUI (for LLM agents and CI)
#[arg(long)]
json: bool,
/// Output format: tui (default), json, text
#[arg(long, default_value = "tui")]
format: String,
},
/// List, show, and compare training experiment runs
#[cfg(feature = "training")]
Runs {
#[command(subcommand)]
command: RunsCommands,
},
/// Interactive experiment browser (TUI with loss curves)
#[cfg(feature = "training")]
Experiment {
#[command(subcommand)]
command: ExperimentCommands,
},
/// ComputeBrick pipeline monitor (cbtop)
Cbtop {
/// Model name (e.g., qwen2.5-coder-1.5b)
#[arg(long)]
model: Option<String>,
/// Attach to running realizar process
#[arg(long)]
attach: Option<String>,
/// Path to GGUF model file for real profiling
#[arg(long, value_name = "MODEL")]
model_path: Option<PathBuf>,
/// Run in headless mode (no TUI, for CI/automation)
#[arg(long)]
headless: bool,
/// Output JSON format (requires --headless)
#[arg(long)]
json: bool,
/// Output file path (requires --headless)
#[arg(long, value_name = "FILE")]
output: Option<PathBuf>,
/// CI mode: exit with code 1 if thresholds not met
#[arg(long)]
ci: bool,
/// Minimum throughput threshold in tok/s (for --ci)
#[arg(long, value_name = "TOK_S")]
throughput: Option<f64>,
/// Minimum brick score threshold 0-100 (for --ci)
#[arg(long, value_name = "SCORE")]
brick_score: Option<u32>,
/// Number of warmup iterations before measurement
#[arg(long, default_value = "10")]
warmup: usize,
/// Number of measurement iterations
#[arg(long, default_value = "100")]
iterations: usize,
/// PAR-100: Enable speculative decoding benchmark
#[arg(long)]
speculative: bool,
/// PAR-100: Number of tokens to draft speculatively (default: 4)
#[arg(long, default_value = "4")]
speculation_k: usize,
/// PAR-099: Path to draft model for speculative decoding
#[arg(long, value_name = "DRAFT_MODEL")]
draft_model: Option<PathBuf>,
/// PAR-102: Number of concurrent requests
#[arg(long, default_value = "1")]
concurrent: usize,
/// Use simulated data (for CI testing only)
#[arg(long)]
simulated: bool,
},
/// Probar testing framework (GH-876 — visual regression, replay, more).
///
/// GH-876 Milestone 1: `apr probar tensor` migrates the existing flat
/// `apr probar <FILE>` behavior (PMAT-481 tensor visual regression).
/// The remaining probador subcommands (test, record, coverage, playbook,
/// comply, av-sync, audio, video, animation, stress, llm) land in
/// follow-up PRs that delegate to the probador library.
Probar {
#[command(subcommand)]
command: ProbarSubcommand,
},
/// Compare APR model against HuggingFace source
#[command(name = "compare-hf")]
CompareHf {
/// Path to .apr model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// HuggingFace repo ID (e.g., openai/whisper-tiny)
#[arg(long)]
hf: String,
/// Filter tensors by name pattern
#[arg(long)]
tensor: Option<String>,
/// Comparison threshold (default: 1e-5)
#[arg(long, default_value = "1e-5")]
threshold: f64,
/// Output as JSON
#[arg(long)]
json: bool,
},
/// CRUX-K-11: parse Ollama-style Modelfile DSL into apr config.
Modelfile {
#[command(subcommand)]
command: ModelfileSubcommand,
},
/// Format-aware binary forensics (10X better than xxd)
Hex {
/// Path to model file (APR, GGUF, or SafeTensors)
#[arg(value_name = "FILE")]
file: PathBuf,
/// Filter tensors by name pattern
#[arg(long)]
tensor: Option<String>,
/// Limit bytes/values to display
#[arg(long, default_value = "64")]
limit: usize,
/// Show tensor statistics
#[arg(long)]
stats: bool,
/// List tensor names only
#[arg(long)]
list: bool,
/// Output as JSON
#[arg(long)]
json: bool,
/// Annotated file header (magic, version, tensor count, metadata)
#[arg(long)]
header: bool,
/// Q4K/Q6K/Q8_0 super-block structure with field annotations
#[arg(long)]
blocks: bool,
/// Value histogram + entropy + kurtosis analysis
#[arg(long)]
distribution: bool,
/// Layout contract verification overlay per tensor
#[arg(long)]
contract: bool,
/// Per-region byte entropy analysis
#[arg(long)]
entropy: bool,
/// Raw bytes (like xxd but format-aware, with ASCII column)
#[arg(long)]
raw: bool,
/// Start at byte offset (supports 0x prefix for hex)
#[arg(long, default_value = "0")]
offset: String,
/// Bytes per row for raw output (default: 16)
#[arg(long, default_value = "16")]
width: usize,
/// Slice range for partial tensor reads (e.g., 0:3 for first 3 elements)
#[arg(long)]
slice: Option<String>,
},
/// Model architecture tree view
Tree {
/// Path to .apr model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Filter by component pattern
#[arg(long)]
filter: Option<String>,
/// Output format: ascii, dot, mermaid, json
#[arg(long, default_value = "ascii")]
format: String,
/// Show tensor sizes
#[arg(long)]
sizes: bool,
/// Maximum tree depth
#[arg(long)]
depth: Option<usize>,
},
/// Data flow visualization
Flow {
/// Path to .apr model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Filter by layer pattern
#[arg(long)]
layer: Option<String>,
/// Component to visualize: full, encoder, decoder, etc.
#[arg(long, default_value = "full")]
component: String,
/// Verbose output with statistics
#[arg(short, long)]
verbose: bool,
/// Output as JSON
#[arg(long)]
json: bool,
},
/// Cross-subcommand smoke test (does every tool handle this model?)
Qualify {
/// Path to model file (APR, GGUF, or SafeTensors)
#[arg(value_name = "FILE")]
file: PathBuf,
/// Testing tier: smoke (Phase 1), standard (+contracts), full (+playbook)
#[arg(long, default_value = "smoke")]
tier: String,
/// Timeout per gate in seconds
#[arg(long, default_value = "120")]
timeout: u64,
/// Output as JSON
#[arg(long)]
json: bool,
/// Show subcommand output (disable stdout suppression)
#[arg(short, long)]
verbose: bool,
/// Skip specific gates (comma-separated)
#[arg(long, value_delimiter = ',')]
skip: Option<Vec<String>>,
},
/// Training pipeline (plan/apply) — forjar-style pre-flight validation
#[cfg(feature = "training")]
Train {
#[command(subcommand)]
command: TrainCommands,
},
/// Pretraining loop driver (SHIP-TWO-001 MODEL-2).
///
/// Wires the pretraining loop shape defined by
/// `contracts/training-loop-pretrain-v1.yaml`. Executes a synthetic
/// decreasing-loss drive by default so GATE-TRAIN-005 / -007 / -008
/// divergence-and-NaN guards can be exercised without an actual
/// 370M compute run. Real corpus wiring is a follow-up ticket.
#[cfg(feature = "training")]
Pretrain {
/// Dataset path (tokenized shard index or raw corpus).
#[arg(long, value_name = "PATH")]
dataset: PathBuf,
/// Tokenizer directory (vocab.json + merges.txt).
#[arg(long, value_name = "DIR")]
tokenizer: PathBuf,
/// Run output directory — checkpoints + metadata go to `{run_dir}/ckpt/`.
#[arg(long, value_name = "DIR")]
run_dir: PathBuf,
/// Training regime — finetune (MODEL-1) or from-scratch (MODEL-2 cold start).
/// Per contract training-loop-pretrain-v1 §hyperparameter_defaults,
/// this atomically flips (regime, lr_max, warmup_steps, target_val_loss)
/// unless explicit --lr / --warmup-steps / --target-val-loss override.
#[arg(long, value_enum, default_value = "finetune")]
mode: PretrainMode,
/// Peak learning rate after warmup. Omit to inherit mode default
/// (finetune: 5e-5, from-scratch: 3e-4).
#[arg(long)]
lr: Option<f32>,
/// Warmup + cosine decay total steps.
#[arg(long, default_value = "1000")]
num_steps: usize,
/// Number of warmup steps. Omit to inherit mode default
/// (finetune: 100, from-scratch: 1000).
#[arg(long)]
warmup_steps: Option<usize>,
/// Micro-batch size.
#[arg(long, default_value = "16")]
batch_size: usize,
/// Sequence length per example.
#[arg(long, default_value = "1024")]
seq_length: usize,
/// Steps per epoch — controls per-epoch artifact cadence.
#[arg(long, default_value = "100")]
steps_per_epoch: usize,
/// GATE-TRAIN-006 fixed RNG seed.
#[arg(long, default_value = "42")]
seed: u64,
/// Target val_loss. Omit to inherit mode default
/// (finetune: 2.2, from-scratch: 3.0).
#[arg(long)]
target_val_loss: Option<f32>,
/// Vocabulary size (required for `--mode from-scratch` INV-TRAIN-005
/// regime-dependent cap: 2·ln(vocab_size)). MODEL-2 uses 50257.
#[arg(long, default_value = "50257")]
vocab_size: u32,
/// Synthetic-drive only — do not attempt real compute, exercise loop gates only.
/// INV-TRAIN-010: absent = real compute (drive_real), present = synthetic (drive_synthetic).
#[arg(long, action = clap::ArgAction::SetTrue)]
synthetic: bool,
/// Training backend. Grammar (contract gpu-training-backend-v1
/// INV-GPUTRAIN-001): `^(cpu|cuda(:[0-9]|:1[0-5])?|auto)$`.
/// Default `auto` uses CUDA if available, else CPU (the only
/// spelling that may fall back silently — all other values
/// hard-fail on missing runtime per GATE-GPUTRAIN-002).
#[arg(long, default_value = "auto")]
device: String,
/// Initial weights from a pretrained APR file
/// (contract `apr-pretrain-from-init-v1`). Per spec §49's
/// MODEL-2 pretrained-init pivot: when present, load weights
/// from `<PATH>` instead of random-init. Composes with
/// `--mode finetune` (canonical) or `--mode from-scratch`
/// (allowed but non-canonical — emits a warning). Missing,
/// corrupted, or arch-mismatched APR files exit non-zero
/// before step 1 (no silent random-init fallback).
#[arg(long, value_name = "PATH")]
init: Option<PathBuf>,
/// SPEC §83 P0-J: bypass the Chinchilla compute-optimal hard
/// gate (`chinchilla-gate-v1`). Default is fail-fast when
/// D/N < 10× (severely under-provisioned per Hoffmann et al.
/// 2022). Pass this flag to acknowledge the under-provisioning
/// and proceed anyway (e.g. for ablation studies, resumed
/// runs, or smoke tests).
#[arg(long, action = clap::ArgAction::SetTrue)]
force_under_provisioned: bool,
/// SPEC §84 P2-F: shared held-out validation shard.
///
/// When provided, the val-loss eval reads `HELD_OUT_BATCHES`
/// batches from this separate `.bin`-shards directory instead
/// of stealing the first 16 batches of `--dataset`. This makes
/// `val_loss` comparable across runs whose `--dataset`
/// composition changes (P2-C's audit-falsified result was
/// confounded by val sets being drawn from different corpus
/// distributions — qwen-v2 = codeparrot only, qwen-v3 =
/// codeparrot + the-stack-dedup).
///
/// Path semantics: directory of `.bin` shards (same format as
/// `--dataset`). Operator tokenizes the held-out corpus
/// independently via `apr tokenize encode-corpus --max-docs N`
/// to a separate output dir, then passes that dir here. The
/// shard contract is `contracts/dataset-thestack-python-v1.yaml`.
///
/// When omitted, falls back to the historical "first 16
/// batches of --dataset" behaviour for backwards compatibility.
#[arg(long, value_name = "DIR")]
val_shard: Option<PathBuf>,
},
/// Tokenizer training pipeline (plan/apply) — BPE vocabulary learning
Tokenize {
#[command(subcommand)]
command: TokenizeCommands,
},
/// Data quality pipeline (audit, split, balance) — powered by alimentar
Data {
#[command(subcommand)]
command: DataCommands,
},
/// Pipeline orchestration (plan/apply/status) — wraps forjar DAG engine
Pipeline {
#[command(subcommand)]
command: PipelineCommands,
},
/// Automated Five Whys diagnosis on a training checkpoint
Diagnose {
/// Path to checkpoint directory
#[arg(value_name = "CHECKPOINT_DIR")]
checkpoint_dir: PathBuf,
/// Test data file (JSONL) for evaluation
#[arg(long, value_name = "FILE")]
data: Option<PathBuf>,
/// Model size hint: "0.5B", "tiny"
#[arg(long)]
model_size: Option<String>,
/// Number of output classes (default: 5)
#[arg(long, default_value = "5")]
num_classes: usize,
},
/// Lint an Ollama /api/chat response for schema + NDJSON invariants (CRUX-C-04)
OllamaChatLint {
/// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
#[arg(long, value_name = "FILE")]
response_file: PathBuf,
/// Treat input as NDJSON stream (one frame per line)
#[arg(long)]
stream: bool,
},
/// Lint an Ollama /api/chat function-calling response (CRUX-I-04)
OllamaToolsLint {
/// Path to captured /api/chat response (JSON object, or NDJSON if --stream)
#[arg(long, value_name = "FILE")]
response_file: PathBuf,
/// Optional captured request JSON — enables tool-name allowlist gate
/// (every called tool name must appear in request.tools[*].function.name)
#[arg(long, value_name = "FILE")]
request_file: Option<PathBuf>,
/// Treat input as NDJSON stream (one frame per line)
#[arg(long)]
stream: bool,
},
/// Lint a captured DRY-sampling observation (CRUX-C-23)
DrySamplingLint {
/// Path to observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured AWQ quality/compression/flags observation (CRUX-B-08)
AwqLint {
/// Path to captured AWQ observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured FP8 (E4M3) round-trip + SM-capability observation (CRUX-B-11)
Fp8Lint {
/// Path to captured observation JSON (frobenius, capability blocks)
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured NF4 codebook/roundtrip/storage/parity observation (CRUX-B-10)
Nf4Lint {
/// Path to captured NF4 observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured GPTQ compression/cosine/flags observation (CRUX-B-09)
GptqLint {
/// Path to captured GPTQ observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured CUDA OOM postmortem report (CRUX-F-13)
OomLint {
/// Path to captured OOM postmortem JSON (e.g. /tmp/apr-oom-<ts>.json)
#[arg(long, value_name = "FILE")]
report_file: PathBuf,
/// Optional captured stderr log to verify the OOM_REPORT breadcrumb
#[arg(long, value_name = "FILE")]
stderr_file: Option<PathBuf>,
},
/// Lint a captured NCCL failure-diagnostics JSON from stderr (CRUX-F-15)
NcclDiagLint {
/// Path to captured stderr JSON diagnostic
#[arg(long, value_name = "FILE")]
diag_file: PathBuf,
/// Optional observed exit code (gate: >= 128 = NCCL class)
#[arg(long, value_name = "I32")]
exit_code: Option<i32>,
/// Require the `suggest` field to cite an nvidia.com / NVIDIA/nccl URL
#[arg(long)]
require_doc_link: bool,
},
/// Lint a captured `apr agent --trace` ReAct loop trace (CRUX-I-06)
ReactTraceLint {
/// Path to captured trace JSON
#[arg(long, value_name = "FILE")]
trace_file: PathBuf,
/// Optional max_iterations budget the trace was produced under
#[arg(long, value_name = "N")]
max_iterations: Option<i64>,
/// Require the scratchpad to parse cleanly as Thought/Action/Observation blocks
#[arg(long)]
require_grammar: bool,
},
/// Lint a captured `$APR_TRACE_DIR` hang stack-dump directory (CRUX-F-14)
HangTraceLint {
/// Path to the captured trace directory
#[arg(long, value_name = "DIR")]
trace_dir: PathBuf,
/// Inspection mode: `timeout` (expects per-rank dumps) or `success` (expects empty dir)
#[arg(long, value_name = "MODE", default_value = "timeout")]
mode: String,
/// Expected world_size when mode=timeout (number of rank{N}.py.txt files)
#[arg(long, value_name = "N", default_value_t = 2)]
world_size: usize,
/// Actual exit code from the run under inspection (for exit-code gate)
#[arg(long, value_name = "I32")]
exit_code: Option<i32>,
/// Expected exit code (typically 124 for timeout, 1 for other error, 0 for success)
#[arg(long, value_name = "I32")]
expected_exit_code: Option<i32>,
},
/// Lint two captured `apr finetune --parallel ddp --json` outputs (N=1, N=k) (CRUX-D-11)
DdpMetricsLint {
/// Path to N=1 metrics JSON
#[arg(long, value_name = "FILE")]
metrics_1gpu_file: PathBuf,
/// Path to N=world_size metrics JSON
#[arg(long, value_name = "FILE")]
metrics_ngpu_file: PathBuf,
/// World size used for --metrics-ngpu-file run (>= 2)
#[arg(long, value_name = "N")]
world_size: i64,
/// Scaling-efficiency floor (default 0.85, PyTorch DDP convention)
#[arg(long, value_name = "F", default_value_t = 0.85)]
scaling_floor: f64,
/// Loss-parity relative tolerance (default 0.01)
#[arg(long, value_name = "F", default_value_t = 0.01)]
loss_tolerance: f64,
},
/// Lint a captured `apr dataset audio-inspect --format json` body (CRUX-H-13)
AudioInspectLint {
/// Path to captured JSON body
#[arg(long, value_name = "FILE")]
json_file: PathBuf,
/// Optional expected sample_rate (typically the `--resample-to` arg)
#[arg(long, value_name = "U32")]
expected_sample_rate: Option<u32>,
/// Optional expected channel count (1 = mono after --mono)
#[arg(long, value_name = "U32")]
expected_channels: Option<u32>,
},
/// Lint captured flash-attn2 parity + provenance JSON outputs (CRUX-L-02)
AttnParityLint {
/// Path to captured `apr kernel parity --impl flash2 --ref naive --json` body
#[arg(long, value_name = "FILE")]
parity_file: Option<PathBuf>,
/// Path to captured `apr run --attn flash2 --json` body for provenance check
#[arg(long, value_name = "FILE")]
provenance_file: Option<PathBuf>,
/// Path to captured head_dim error JSON
#[arg(long, value_name = "FILE")]
head_dim_error_file: Option<PathBuf>,
/// Max absolute diff tolerance (default 5e-3, FlashAttention-2 bound)
#[arg(long, value_name = "F", default_value_t = 5e-3)]
tol_abs: f64,
/// Min cosine similarity floor (default 0.9999)
#[arg(long, value_name = "F", default_value_t = 0.9999)]
tol_cos: f64,
},
/// Lint a captured `apr attn-viz` attention dump (CRUX-F-17)
AttnVizLint {
/// Path to attention dump in JSON form (4-D [layers][heads][rows][cols] floats)
#[arg(long, value_name = "FILE")]
attn_file: Option<PathBuf>,
/// Path to HTML heatmap output
#[arg(long, value_name = "FILE")]
html_file: Option<PathBuf>,
/// Minimum <svg|<canvas open-tag count expected in HTML (|layers|*|heads|)
#[arg(long, value_name = "N", default_value_t = 1)]
expected_heatmaps: usize,
/// Row-softmax normalization tolerance (default 1e-5)
#[arg(long, value_name = "F64", default_value_t = 1e-5)]
tolerance: f64,
/// Causal-mask zero epsilon (default 1e-9)
#[arg(long, value_name = "F64", default_value_t = 1e-9)]
epsilon: f64,
},
/// Lint a captured `apr trace --check-finite` error JSON and/or `--list` coverage JSON (CRUX-F-11)
CheckFiniteLint {
/// Captured stderr JSON from `apr trace --check-finite` on a poisoned model
#[arg(long, value_name = "FILE")]
error_file: Option<PathBuf>,
/// Captured stdout JSON from `apr trace --check-finite --list`
#[arg(long, value_name = "FILE")]
list_file: Option<PathBuf>,
/// Minimum layer-coverage count when `--list-file` is supplied (default 100)
#[arg(long, value_name = "N", default_value_t = 100)]
min_layers: usize,
},
/// Lint a captured `apr debug embed-viz` CSV (CRUX-F-18)
EmbedVizLint {
/// Path to captured embed-viz CSV (token_id,token_str,x,y)
#[arg(long, value_name = "FILE")]
csv_file: PathBuf,
/// Expected row count == vocab_size (optional)
#[arg(long, value_name = "N")]
expected_vocab_size: Option<usize>,
/// Second CSV captured under the same seed for determinism check (optional)
#[arg(long, value_name = "FILE")]
csv_file_b: Option<PathBuf>,
},
/// Lint a captured `apr explain --format jsonl` token-selection trace (CRUX-F-19)
ExplainTokenLint {
/// Path to captured JSONL body (one sampled-token record per line)
#[arg(long, value_name = "FILE")]
jsonl_file: PathBuf,
/// Tolerance for `Σ post_prob ≈ 1.0` (default 1e-5)
#[arg(long, value_name = "F64", default_value_t = 1e-5)]
tolerance: f64,
/// Assert greedy decoding: sampled_id must equal argmax(pre_prob)
#[arg(long)]
require_greedy: bool,
},
/// Lint a captured GPU memory Chrome Trace Event Format JSON (CRUX-F-07)
GpuMemtraceLint {
/// Path to captured Chrome Trace JSON from `apr profile --gpu-memory-trace`
#[arg(long, value_name = "FILE")]
trace_file: PathBuf,
},
/// Lint a captured KV-cache utilization timeline (CRUX-F-06)
KvTimelineLint {
/// Path to captured `apr profile --kv-timeline --json` body
#[arg(long, value_name = "FILE")]
timeline_file: PathBuf,
/// Preemption threshold (default 0.95, vLLM canonical)
#[arg(long, value_name = "FRACTION", default_value_t = 0.95)]
preempt_threshold: f64,
},
/// Lint a captured OTLP/JSON ExportTraceServiceRequest body (CRUX-K-08)
OtlpLint {
/// Path to captured OTLP/JSON export body
#[arg(long, value_name = "FILE")]
otlp_file: PathBuf,
/// Require at least one `apr.inference` span to be present
#[arg(long)]
require_apr_span: bool,
/// Require gen_ai.* and apr.tokens.* attribute keys on some span
#[arg(long)]
require_genai_attrs: bool,
/// Verify W3C trace-context propagation: expect this 32-hex traceId
#[arg(long, value_name = "HEX32")]
expect_trace_id: Option<String>,
},
/// Lint a captured Prometheus /metrics response (CRUX-K-07)
PrometheusLint {
/// Path to captured /metrics response body (text/plain; version=0.0.4)
#[arg(long, value_name = "FILE")]
metrics_file: PathBuf,
/// Optional captured Content-Type header to verify against version=0.0.4
#[arg(long, value_name = "HEADER")]
content_type: Option<String>,
/// Require the K-07 metric set (apr_num_requests_running, ...) to be present
#[arg(long)]
require_k07_metrics: bool,
},
/// Lint a captured OpenAI tool-use response (CRUX-C-11)
ToolUseLint {
/// Path to captured OpenAI tool-use response JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a GBNF grammar-constrained observation (CRUX-C-10)
GbnfLint {
/// Path to captured GBNF observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a typical-p sampling observation (CRUX-C-22)
TypicalPLint {
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Gradient-norm telemetry analysis (CRUX-F-09)
GradNorm {
/// Path to JSON file of per-step grad-norm records
#[arg(long, value_name = "FILE")]
history_file: PathBuf,
/// Maximum allowed clipped grad-norm (for cap-violation check)
#[arg(long, value_name = "M")]
max_grad_norm: Option<f64>,
/// Rolling-median window size for spike detection (in steps)
#[arg(long, default_value = "16")]
spike_window: usize,
/// Multiplier threshold for spike detection
#[arg(long, default_value = "10.0")]
spike_multiplier: f64,
},
/// Lint a captured registry byte-quota observation (CRUX-A-22)
RegistryQuotaLint {
/// Path to captured quota/atomic/ceiling observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured imatrix calibration observation (CRUX-B-07)
ImatrixLint {
/// Path to captured imatrix observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured /v1/embeddings observation (CRUX-C-13)
EmbeddingsLint {
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured Hub+local unified-search merge observation (CRUX-A-23)
UnifiedSearchLint {
/// Path to captured unified-search observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured `apr rm` / `apr gc` blob-GC observation (CRUX-A-25)
RmGcLint {
/// Path to captured rm/gc observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Lint a captured APR_MODELS shared-cache observation (CRUX-A-21)
SharedCacheLint {
/// Path to captured dedup/permission observation JSON
#[arg(long, value_name = "FILE")]
observation_file: PathBuf,
},
/// Perplexity classifier (CRUX-E-02)
Ppl {
/// JSON file containing an array of per-token natural-log
/// probabilities (e.g. `[-1.2, -0.5, -2.1, ...]`). Required.
#[arg(long, value_name = "FILE")]
log_probs_file: PathBuf,
},
/// Validate dequant→requant metadata preservation (CRUX-B-19)
QuantPreservationLint {
/// Reference GGUF (pre-roundtrip)
#[arg(long, value_name = "REF.gguf")]
reference: PathBuf,
/// Requantized GGUF (post-roundtrip)
#[arg(long, value_name = "REQ.gguf")]
requant: PathBuf,
},
/// Split a safetensors file into shards + weight-map index (CRUX-B-05)
Shard {
/// Single-file safetensors model to split
#[arg(value_name = "FILE")]
file: PathBuf,
/// Maximum size of each shard (e.g. 5GB, 500MB, 1.5GiB)
#[arg(long, value_name = "SIZE", default_value = "5GB")]
max_shard_size: String,
/// Output directory for shards + model.safetensors.index.json
#[arg(short, long, value_name = "DIR")]
output: PathBuf,
},
/// Reconstruct a single safetensors file from a sharded directory (CRUX-B-05)
Unshard {
/// Sharded directory containing model.safetensors.index.json
#[arg(value_name = "DIR")]
input: PathBuf,
/// Output single-file safetensors path
#[arg(short, long, value_name = "FILE")]
output: PathBuf,
},
/// Publishing, conversion, and analysis tools
#[command(flatten)]
Tools(ToolCommands),
/// Score a query/passage pair (or rank multiple passages) with a BERT
/// cross-encoder loaded from an APR v2 file (GH-326 Phase 3).
///
/// Wraps `aprender_core::models::bert::CrossEncoder::load_from_reader`
/// + `score()`. The APR must contain the canonical HF BERT tensor
/// names (see `models::bert::expected_bert_tensor_names`).
///
/// Tokenisation is NOT applied here — caller passes pre-tokenised
/// `input_ids` + `token_type_ids` as comma-delimited u32 lists. A
/// dedicated tokeniser-aware mode is Phase 3b follow-up scope.
Rerank {
/// Path to the APR file containing the cross-encoder weights.
#[arg(value_name = "MODEL")]
model: PathBuf,
/// Pre-tokenised input ids (comma-separated `u32`s). Mutually
/// exclusive with `--query`+`--passage`+`--vocab` (Phase 3b).
/// Example: `--input-ids 101,2024,102,3456,102` for `[CLS] q [SEP] p [SEP]`.
#[arg(long, value_name = "IDS")]
input_ids: Option<String>,
/// Pre-tokenised token-type ids (comma-separated `u32`s).
/// Same length as `--input-ids`. 0 for query side, 1 for passage.
#[arg(long, value_name = "IDS")]
token_type_ids: Option<String>,
/// Phase 3b — query text. Pair with `--passage` + `--vocab` to enable
/// in-process WordPiece tokenisation. The tokeniser builds
/// `[CLS] query [SEP] passage [SEP]` with `token_type_ids = 0` for
/// the query side and `1` for the passage side.
#[arg(long, value_name = "TEXT")]
query: Option<String>,
/// Phase 3b — passage text. Required when `--query` is supplied
/// in single-pair mode (use `--passages` for batch ranking).
#[arg(long, value_name = "TEXT")]
passage: Option<String>,
/// Phase 5 — batch ranking mode (#326). Passage candidates to
/// score against `--query`. May be supplied multiple times:
/// `apr rerank model.apr --query "..." --passages "p1" --passages "p2"`.
/// Mutually exclusive with `--passage`. Output is one
/// `score[i]` line per passage in input order, OR a JSON array
/// of `{passage, logit, score}` objects sorted by descending
/// score when `--sort` is set.
#[arg(long, value_name = "TEXT")]
passages: Vec<String>,
/// Phase 5 — sort batch output by descending score (highest
/// relevance first). Only meaningful with `--passages` and
/// `--json`. Default: preserve input order.
#[arg(long)]
sort: bool,
/// Phase 5 — limit to top-K passages after sorting. Implies
/// `--sort`. Default 0 (no limit).
#[arg(long, default_value_t = 0)]
top_k: usize,
/// Phase 3b — path to a WordPiece `vocab.txt` (one token per line,
/// line index = token id). Required when `--query` is supplied.
/// Must contain entries for `[CLS]`, `[SEP]`, and `[UNK]`.
/// Phase 4 accepts HuggingFace `tokenizer.json` (extension-detected).
#[arg(long, value_name = "FILE")]
vocab: Option<PathBuf>,
/// Override hidden_dim (default: 384 / MiniLM-L-6).
#[arg(long, default_value_t = 384)]
hidden_dim: usize,
/// Override num_layers (default: 6 / MiniLM-L-6).
#[arg(long, default_value_t = 6)]
num_layers: usize,
/// Override num_heads (default: 12 / MiniLM-L-6).
#[arg(long, default_value_t = 12)]
num_heads: usize,
/// Override intermediate_dim (default: 1536 / MiniLM-L-6).
#[arg(long, default_value_t = 1536)]
intermediate_dim: usize,
/// Override vocab_size (default: 30522 / bert-base-uncased).
#[arg(long, default_value_t = 30522)]
vocab_size: usize,
/// Override max_position_embeddings (default: 512).
#[arg(long, default_value_t = 512)]
max_position_embeddings: usize,
/// Override type_vocab_size (default: 2).
#[arg(long, default_value_t = 2)]
type_vocab_size: usize,
/// Number of labels in the classifier head (default: 1 for
/// regression-style relevance scoring).
#[arg(long, default_value_t = 1)]
num_labels: usize,
/// Load the optional BERT pooler dense layer (default: true).
/// Cross-encoders that skip the pooler should pass `--with-pooler false`.
#[arg(long, default_value_t = true)]
with_pooler: bool,
/// Emit the raw logit instead of the sigmoid-mapped relevance score.
#[arg(long)]
raw_logit: bool,
/// Output as JSON.
#[arg(long)]
json: bool,
},
/// Produce sentence embeddings from a BERT bi-encoder (GH-326 Phase 6).
///
/// First-stage dense retrieval companion to `apr rerank`. Loads an
/// encoder-only BertModel (e.g. `sentence-transformers/all-MiniLM-L6-v2`),
/// tokenises the input text with WordPiece, runs the full encoder
/// forward, then pools the hidden states with one of:
/// `--pool cls` — take the [CLS] hidden state
/// `--pool mean` — mean over non-padding token positions (default;
/// sentence-transformers convention)
/// Optionally L2-normalises the result (`--normalize`, default true,
/// matches sentence-transformers).
Embed {
/// Path to the APR file containing the encoder weights (BertModel).
#[arg(value_name = "MODEL")]
model: PathBuf,
/// Text to encode. Repeatable: `apr embed model.apr --text "a" --text "b" --vocab tok.json`.
#[arg(long, value_name = "TEXT")]
text: Vec<String>,
/// Phase 7 (GH-326) — read texts from a file, one per line.
/// Concatenated with `--text` inputs in order: `--text` first,
/// then `--text-file` rows. Blank lines and lines starting
/// with `#` are skipped. Useful for RAG-style first-stage
/// retrieval where the second-stage rerank candidate set
/// (50-100 documents) is the embed input.
#[arg(long, value_name = "FILE")]
text_file: Option<PathBuf>,
/// Path to a WordPiece `vocab.txt` or HF `tokenizer.json`.
#[arg(long, value_name = "FILE")]
vocab: PathBuf,
/// Pooling strategy (`cls` or `mean`). Default: `mean`
/// (matches sentence-transformers convention).
#[arg(long, default_value = "mean")]
pool: String,
/// L2-normalise the output embedding. Default: true (matches
/// sentence-transformers convention). Pass `--normalize false`
/// to keep raw magnitudes.
#[arg(long, default_value_t = true)]
normalize: bool,
/// Override hidden_dim (default: 384 / MiniLM).
#[arg(long, default_value_t = 384)]
hidden_dim: usize,
/// Override num_layers (default: 6 / MiniLM-L-6).
#[arg(long, default_value_t = 6)]
num_layers: usize,
/// Override num_heads.
#[arg(long, default_value_t = 12)]
num_heads: usize,
/// Override intermediate_dim.
#[arg(long, default_value_t = 1536)]
intermediate_dim: usize,
/// Override vocab_size.
#[arg(long, default_value_t = 30522)]
vocab_size: usize,
/// Override max_position_embeddings.
#[arg(long, default_value_t = 512)]
max_position_embeddings: usize,
/// Override type_vocab_size.
#[arg(long, default_value_t = 2)]
type_vocab_size: usize,
/// Output as JSON.
#[arg(long)]
json: bool,
},
}
#[cfg(feature = "training")]
/// Subcommands for `apr runs` — experiment run management (ALB-050/051)
#[derive(Subcommand, Debug)]
pub enum RunsCommands {
/// List all training experiment runs (with inline loss sparklines)
Ls {
/// Directory to scan for experiments (default: current dir)
#[arg(long, value_name = "DIR")]
dir: Option<PathBuf>,
/// Read from global experiment registry (~/.entrenar/experiments.db)
#[arg(long)]
global: bool,
/// Filter by status: running, completed, failed, all
#[arg(long, default_value = "all")]
status: String,
/// Output as JSON
#[arg(long)]
json: bool,
/// Maximum number of runs to show
#[arg(long, default_value = "50")]
limit: usize,
},
/// Show detailed metrics for a specific run (with braille loss curve)
Show {
/// Run ID
#[arg(value_name = "RUN_ID")]
run_id: String,
/// Directory containing experiment DB
#[arg(long, value_name = "DIR")]
dir: Option<PathBuf>,
/// Read from global registry
#[arg(long)]
global: bool,
/// Output as JSON
#[arg(long)]
json: bool,
},
/// Compare two runs side-by-side (loss curves, config diff, metrics)
Diff {
/// First run ID
#[arg(value_name = "RUN_A")]
run_a: String,
/// Second run ID
#[arg(value_name = "RUN_B")]
run_b: String,
/// Directory containing experiment DB
#[arg(long, value_name = "DIR")]
dir: Option<PathBuf>,
/// Read from global registry
#[arg(long)]
global: bool,
/// Output as JSON
#[arg(long)]
json: bool,
},
}
#[cfg(feature = "training")]
/// Subcommands for `apr experiment` — interactive experiment browser (ALB-024)
#[derive(Subcommand, Debug)]
pub enum ExperimentCommands {
/// Browse experiment history with interactive TUI (loss curves, params)
View {
/// Path to experiment database file
#[arg(long, value_name = "FILE")]
db: Option<PathBuf>,
/// Read from global experiment registry (~/.entrenar/experiments.db)
#[arg(long)]
global: bool,
/// Output as JSON (non-interactive)
#[arg(long)]
json: bool,
},
}
/// CRUX-K-11: Subcommands for `apr modelfile`.
#[derive(Subcommand, Debug)]
pub enum ModelfileSubcommand {
/// Parse an Ollama-style Modelfile and emit the parsed config.
///
/// Grammar: `FROM`, `PARAMETER`, `TEMPLATE`, `SYSTEM`, `LICENSE`,
/// `MESSAGE`, `ADAPTER` directives. Triple-quoted blocks supported.
/// Directive names are case-insensitive. Unknown directives raise
/// `file:line:col` errors.
Parse {
/// Path to the Modelfile
#[arg(value_name = "FILE")]
file: PathBuf,
/// Output format: `json` or `human`
#[arg(long, default_value = "json")]
format: String,
},
}
/// GH-876: Subcommands for `apr probar` — consolidates the probador testing
/// framework under `apr`. Milestone 1 ships only `tensor` (the migrated
/// existing behavior). Subsequent milestones add the remaining 14 probador
/// subcommands as separate PRs that delegate to the probador library.
#[derive(Subcommand, Debug)]
pub enum ProbarSubcommand {
/// Export tensor activations for visual regression testing (PMAT-481).
///
/// Generates JSON/PNG per-layer test artifacts that can be compared
/// against a golden reference directory to detect regressions in
/// model behavior after weight updates, quantization, or refactors.
Tensor {
/// Path to .apr model file
#[arg(value_name = "FILE")]
file: PathBuf,
/// Output directory for test artifacts
#[arg(short, long, default_value = "./probar-export")]
output: PathBuf,
/// Export format: json, png, or both
#[arg(long, default_value = "both")]
format: String,
/// Golden reference directory for comparison
#[arg(long)]
golden: Option<PathBuf>,
/// Filter layers by name pattern
#[arg(long)]
layer: Option<String>,
/// Exit non-zero on golden divergence (CI mode, PMAT-481)
#[arg(long)]
assert: bool,
/// Cosine similarity threshold for golden comparison (default: 0.98)
#[arg(long, default_value = "0.98")]
tolerance: f32,
},
}