#![allow(
clippy::disallowed_methods,
clippy::unwrap_used,
clippy::uninlined_format_args
)]
use assert_cmd::Command;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn stage_vocab_json(dir: &std::path::Path, n: usize) {
fs::create_dir_all(dir).expect("mkdir tokenizer dir");
let mut obj = serde_json::Map::with_capacity(n);
for i in 0..n {
obj.insert(format!("t{i}"), serde_json::Value::from(i as u64));
}
let json = serde_json::to_string(&obj).expect("serialize");
fs::write(dir.join("vocab.json"), json).expect("write vocab.json");
}
#[allow(dead_code)]
fn pretrain_cmd(
dataset: &std::path::Path,
tokenizer: &std::path::Path,
run_dir: &std::path::Path,
init: &std::path::Path,
num_steps: usize,
) -> Command {
let mut cmd = Command::cargo_bin("apr").expect("apr binary built");
cmd.arg("pretrain")
.arg("--dataset")
.arg(dataset)
.arg("--tokenizer")
.arg(tokenizer)
.arg("--run-dir")
.arg(run_dir)
.arg("--init")
.arg(init)
.arg("--num-steps")
.arg(num_steps.to_string())
.arg("--batch-size")
.arg("16")
.arg("--seq-length")
.arg("512")
.arg("--mode")
.arg("finetune")
.arg("--device")
.arg("cpu")
.arg("--synthetic"); cmd
}
#[test]
fn falsify_chinchilla_003_no_init_skips_gate() {
let tmp = TempDir::new().expect("tempdir");
let dataset = tmp.path().join("dataset");
fs::create_dir_all(&dataset).expect("mkdir dataset");
let tokenizer = tmp.path().join("tok");
stage_vocab_json(&tokenizer, 50257);
let run_dir = tmp.path().join("run");
let mut cmd = Command::cargo_bin("apr").expect("apr binary built");
cmd.arg("pretrain")
.arg("--dataset")
.arg(&dataset)
.arg("--tokenizer")
.arg(&tokenizer)
.arg("--run-dir")
.arg(&run_dir)
.arg("--num-steps")
.arg("1")
.arg("--batch-size")
.arg("1")
.arg("--seq-length")
.arg("64")
.arg("--mode")
.arg("from-scratch")
.arg("--device")
.arg("cpu")
.arg("--synthetic");
let output = cmd.output().expect("run apr pretrain");
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
!stderr.contains("[P0-J] Chinchilla hard gate"),
"no-init path must NOT trigger the Chinchilla gate; stderr was: {stderr}"
);
}
#[allow(dead_code)]
fn build_init_apr_fixture(_path: &PathBuf) {
}
#[test]
fn force_under_provisioned_flag_documented_in_help() {
let mut cmd = Command::cargo_bin("apr").expect("apr binary built");
cmd.arg("pretrain").arg("--help");
let output = cmd.output().expect("run apr pretrain --help");
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
stdout.contains("--force-under-provisioned"),
"apr pretrain --help must list --force-under-provisioned per contract C-CHINCHILLA-GATE INV-CHINCHILLA-002"
);
let has_context = stdout.contains("P0-J")
|| stdout.contains("Chinchilla")
|| stdout.contains("chinchilla-gate-v1");
assert!(
has_context,
"apr pretrain --help should reference the Chinchilla gate (P0-J / chinchilla-gate-v1)"
);
}