use serde_json::Value;
use std::fs;
use std::path::Path;
use std::process::Command;
mod common;
use common::{ChildInputExt, TempDir};
#[test]
fn diff_reports_canonical_json_matches_and_mismatches() {
let temp = TempDir::new("biors-diff");
let expected = temp.write("expected.json", r#"{"tokens":[1,2],"id":"seq1"}"#);
let reordered = temp.write("reordered.json", r#"{"id":"seq1","tokens":[1,2]}"#);
let mismatch = temp.write("mismatch.json", r#"{"id":"seq1","tokens":[1,3]}"#);
let matching = run_biors(&["diff"], &[&expected, &reordered]);
assert_eq!(matching["data"]["matches"], true);
assert!(matching["data"]["expected_sha256"]
.as_str()
.expect("expected hash")
.starts_with("sha256:"));
assert_eq!(matching["data"]["content_diff"], Value::Null);
let different = run_biors(&["diff"], &[&expected, &mismatch]);
assert_eq!(different["data"]["matches"], false);
assert_ne!(
different["data"]["expected_sha256"],
different["data"]["observed_sha256"]
);
assert_eq!(
different["data"]["content_diff"]["expected_path"],
expected.display().to_string()
);
assert!(
different["data"]["content_diff"]["first_difference"]["byte_offset"]
.as_u64()
.is_some()
);
}
#[test]
fn pipeline_outputs_validate_tokenize_export_chain_without_config() {
let output =
common::spawn_biors(&["pipeline", "--max-length", "6", "-"]).tap_stdin(">seq1\nACDE\n");
assert!(
output.status.success(),
"stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
assert!(output.stderr.is_empty());
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON output");
assert_eq!(value["data"]["pipeline"], "validate_tokenize_export.v0");
assert_eq!(value["data"]["ready"], true);
assert_eq!(value["data"]["steps"][0]["name"], "validate");
assert_eq!(value["data"]["steps"][1]["name"], "tokenize");
assert_eq!(value["data"]["steps"][2]["name"], "export");
assert_eq!(value["data"]["steps"][2]["status"], "passed");
assert_eq!(
value["data"]["workflow"]["model_input"]["records"][0]["input_ids"],
serde_json::json!([0, 1, 2, 3, 0, 0])
);
}
#[test]
fn pipeline_runs_toml_config_with_explain_plan() {
let config = repo_root().join("examples/pipeline/protein.toml");
let config_arg = config.to_string_lossy();
let value = run_biors(
&["pipeline", "--config", &config_arg, "--explain-plan"],
&[],
);
assert_eq!(value["data"]["pipeline"], "config_pipeline.v0");
assert_eq!(value["data"]["config"]["name"], "protein-fixture-pipeline");
assert_eq!(value["data"]["dry_run"], false);
assert_eq!(value["data"]["explain_plan"], true);
assert_eq!(
value["data"]["steps"]
.as_array()
.expect("steps")
.iter()
.map(|step| step["name"].as_str().expect("step name"))
.collect::<Vec<_>>(),
vec!["parse", "normalize", "validate", "tokenize", "export"]
);
assert_eq!(
value["data"]["plan"]["stages"][0]["operation"],
"parse FASTA input"
);
assert_eq!(
value["data"]["workflow"]["model_input"]["records"][0]["input_ids"],
serde_json::json!([0, 1, 2, 3, 0, 0, 0, 0])
);
}
#[test]
fn pipeline_config_rejects_yaml_by_default() {
let temp = TempDir::new("biors-pipeline-yaml");
let config = temp.write(
"pipeline.yaml",
r#"schema_version: biors.pipeline.v0
name: dry-run-only
input:
format: fasta
path: missing/input.fasta
normalize:
policy: strip_ascii_whitespace_uppercase
validate:
kind: protein
tokenize:
profile: protein-20
export:
format: model-input-json
max_length: 8
"#,
);
let config_arg = config.to_string_lossy();
let output = Command::new(env!("CARGO_BIN_EXE_biors"))
.arg("--json")
.arg("pipeline")
.arg("--config")
.arg(config_arg.as_ref())
.arg("--dry-run")
.output()
.expect("run biors pipeline");
assert_eq!(output.status.code(), Some(2));
assert!(output.stderr.is_empty());
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON error");
assert_eq!(value["error"]["code"], "pipeline.invalid_config");
assert!(value["error"]["message"]
.as_str()
.expect("error message")
.contains("unsupported pipeline config extension: yaml"));
}
#[test]
fn pipeline_runs_json_config() {
let config = repo_root().join("examples/pipeline/protein.json");
let config_arg = config.to_string_lossy();
let value = run_biors(&["pipeline", "--config", &config_arg], &[]);
assert_eq!(value["data"]["pipeline"], "config_pipeline.v0");
assert_eq!(
value["data"]["config"]["schema_version"],
"biors.pipeline.v0"
);
assert_eq!(value["data"]["ready"], true);
assert!(value["data"]["plan"].is_null());
}
#[test]
fn pipeline_config_accepts_nucleotide_profiles() {
let temp = TempDir::new("biors-pipeline-nucleotide");
let fasta = temp.write("dna.fasta", ">dna\nACGT\n");
let config = temp.write(
"dna.toml",
&format!(
r#"schema_version = "biors.pipeline.v0"
name = "dna-pipeline"
[input]
format = "fasta"
path = "{}"
[normalize]
policy = "strip_ascii_whitespace_uppercase"
[validate]
kind = "dna"
[tokenize]
profile = "dna-iupac"
[export]
format = "model-input-json"
max_length = 6
pad_token_id = 0
padding = "fixed_length"
"#,
fasta.file_name().expect("fasta filename").to_string_lossy()
),
);
let config_arg = config.to_string_lossy();
let value = run_biors(&["pipeline", "--config", &config_arg], &[]);
assert_eq!(value["data"]["ready"], true);
assert_eq!(value["data"]["config"]["name"], "dna-pipeline");
assert_eq!(
value["data"]["workflow"]["provenance"]["validation_alphabet"],
"dna-iupac"
);
assert_eq!(
value["data"]["workflow"]["provenance"]["tokenizer"]["name"],
"dna-iupac"
);
assert_eq!(
value["data"]["workflow"]["model_input"]["records"][0]["input_ids"],
serde_json::json!([0, 1, 2, 3, 0, 0])
);
}
#[test]
fn pipeline_config_rejects_kind_profile_mismatch() {
let temp = TempDir::new("biors-pipeline-profile-mismatch");
temp.write("dna.fasta", ">dna\nACGT\n");
let config = temp.write(
"mismatch.toml",
r#"schema_version = "biors.pipeline.v0"
name = "mismatch"
[input]
format = "fasta"
path = "dna.fasta"
[normalize]
policy = "strip_ascii_whitespace_uppercase"
[validate]
kind = "protein"
[tokenize]
profile = "dna-iupac"
[export]
format = "model-input-json"
max_length = 6
"#,
);
let output = Command::new(env!("CARGO_BIN_EXE_biors"))
.arg("--json")
.arg("pipeline")
.arg("--config")
.arg(config)
.output()
.expect("run biors pipeline");
assert_eq!(output.status.code(), Some(2));
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON error");
assert_eq!(value["error"]["code"], "pipeline.invalid_config");
assert_eq!(value["error"]["location"], "validate.kind");
assert!(value["error"]["message"]
.as_str()
.expect("error message")
.contains("validate.kind must be 'dna'"));
}
#[test]
fn pipeline_writes_lockfile_with_package_provenance() {
let temp = TempDir::new("biors-pipeline-lock");
let lockfile = temp.path().join("pipeline.lock");
let repo = repo_root();
let config = repo.join("examples/protein-package/pipelines/protein.toml");
let package = repo.join("examples/protein-package/manifest.json");
let config_arg = config.to_string_lossy();
let package_arg = package.to_string_lossy();
let lock_arg = lockfile.to_string_lossy();
let value = run_biors(
&[
"pipeline",
"--config",
&config_arg,
"--package",
&package_arg,
"--write-lock",
&lock_arg,
],
&[],
);
assert_eq!(value["data"]["pipeline"], "config_pipeline.v0");
assert!(lockfile.exists(), "pipeline.lock was not written");
let lock: Value = serde_json::from_str(&fs::read_to_string(&lockfile).expect("read lockfile"))
.expect("valid lockfile JSON");
assert_eq!(lock["schema_version"], "biors.pipeline.lock.v0");
assert_eq!(
lock["pipeline_config"]["schema_version"],
"biors.pipeline.v0"
);
assert_eq!(lock["package"]["name"], "protein-seed");
assert_eq!(
lock["package"]["model_sha256"],
"sha256:2c1da72b15fab35bd6f1bb62f5037b936e26e6413a220fa9afe5a64bce0df68d"
);
assert_eq!(lock["package"]["runtime_backend"], "onnx-webgpu");
assert_eq!(lock["package"]["backend_version"], "onnx-webgpu.v0");
assert!(lock["hashes"]["vocabulary_sha256"]
.as_str()
.expect("vocab hash")
.starts_with("sha256:"));
assert!(lock["execution"]["input_hash"]
.as_str()
.expect("input hash")
.starts_with("fnv1a64:"));
assert!(lock.get("python_baseline").is_none());
}
#[test]
fn pipeline_lock_rejects_package_with_unrelated_config() {
let temp = TempDir::new("biors-pipeline-lock-unrelated");
let lockfile = temp.path().join("pipeline.lock");
let repo = repo_root();
let config = repo.join("examples/pipeline/protein.toml");
let package = repo.join("examples/protein-package/manifest.json");
let output = Command::new(env!("CARGO_BIN_EXE_biors"))
.arg("--json")
.arg("pipeline")
.arg("--config")
.arg(&config)
.arg("--package")
.arg(&package)
.arg("--write-lock")
.arg(&lockfile)
.output()
.expect("run biors pipeline lock generation");
assert_eq!(output.status.code(), Some(2));
assert!(!lockfile.exists(), "pipeline.lock should not be written");
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON error");
assert_eq!(
value["error"]["code"],
"pipeline.lock_config_not_in_package"
);
assert_eq!(value["error"]["location"], "pipeline.config");
}
#[test]
fn pipeline_lock_rejects_same_basename_config_outside_package() {
let temp = TempDir::new("biors-pipeline-lock-same-basename");
let lockfile = temp.path().join("pipeline.lock");
let other_dir = temp.path().join("other");
fs::create_dir_all(&other_dir).expect("create other config dir");
let config = other_dir.join("protein.toml");
fs::copy(
repo_root().join("examples/protein-package/pipelines/protein.toml"),
&config,
)
.expect("copy config");
let package = repo_root().join("examples/protein-package/manifest.json");
let output = Command::new(env!("CARGO_BIN_EXE_biors"))
.arg("--json")
.arg("pipeline")
.arg("--config")
.arg(&config)
.arg("--package")
.arg(&package)
.arg("--write-lock")
.arg(&lockfile)
.output()
.expect("run biors pipeline lock generation");
assert_eq!(output.status.code(), Some(2));
assert!(!lockfile.exists(), "pipeline.lock should not be written");
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON error");
assert_eq!(
value["error"]["code"],
"pipeline.lock_config_not_in_package"
);
}
#[test]
fn checked_in_pipeline_lock_matches_current_generator() {
let temp = TempDir::new("biors-pipeline-lock-current");
let generated_lock = temp.path().join("pipeline.lock");
let repo = repo_root();
let lock_arg = generated_lock.to_string_lossy();
let output = Command::new(env!("CARGO_BIN_EXE_biors"))
.current_dir(&repo)
.args([
"pipeline",
"--config",
"examples/protein-package/pipelines/protein.toml",
"--package",
"examples/protein-package/manifest.json",
"--write-lock",
lock_arg.as_ref(),
])
.output()
.expect("run biors pipeline lock generation");
assert!(
output.status.success(),
"stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
let checked_in: Value = serde_json::from_str(
&fs::read_to_string(repo.join("examples/pipeline/pipeline.lock"))
.expect("read checked-in lockfile"),
)
.expect("checked-in lockfile JSON");
let generated: Value =
serde_json::from_str(&fs::read_to_string(generated_lock).expect("read generated lockfile"))
.expect("generated lockfile JSON");
assert_eq!(generated, checked_in);
}
#[test]
fn debug_outputs_step_by_step_tokens_model_input_and_error_visualization() {
let output = common::spawn_biors(&["debug", "--max-length", "6", "-"]).tap_stdin(">bad\nAX*\n");
assert!(
output.status.success(),
"stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
assert!(output.stderr.is_empty());
let value: Value = serde_json::from_slice(&output.stdout).expect("valid JSON output");
let record = &value["data"]["records"][0];
assert_eq!(value["data"]["view"], "sequence_debug.v0");
assert_eq!(record["id"], "bad");
assert_eq!(record["normalized_sequence"], "AX*");
assert_eq!(record["token_map"][0]["status"], "standard");
assert_eq!(record["token_map"][1]["status"], "warning");
assert_eq!(record["token_map"][2]["status"], "error");
assert_eq!(record["model_input"], Value::Null);
assert!(record["error_visualization"]["markers"]
.as_str()
.expect("markers")
.contains('E'));
}
fn run_biors(args: &[&str], paths: &[&Path]) -> Value {
let output = common::run_biors_paths(args, paths);
serde_json::from_slice(&output.stdout).expect("valid JSON")
}
fn repo_root() -> std::path::PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../..")
}