use serde_json::Value;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
fn test_data_path(filename: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("test_data")
.join(filename)
}
fn run_fastp(input: &str, temp_dir: &TempDir) -> (PathBuf, PathBuf) {
let output_fq = temp_dir.path().join("fastp_output.fq");
let output_json = temp_dir.path().join("fastp_output.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path(input))
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp - is it installed?");
assert!(status.success(), "fastp command failed");
(output_fq, output_json)
}
fn run_fasterp(input: &str, temp_dir: &TempDir) -> (PathBuf, PathBuf) {
let output_fq = temp_dir.path().join("fasterp_output.fq");
let output_json = temp_dir.path().join("fasterp_output.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path(input))
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "fasterp command failed");
(output_fq, output_json)
}
fn compare_json_outputs(fastp_json: &PathBuf, fasterp_json: &PathBuf) {
let fastp_content = fs::read_to_string(fastp_json).expect("Failed to read fastp JSON");
let fasterp_content = fs::read_to_string(fasterp_json).expect("Failed to read fasterp JSON");
let fastp_data: Value = serde_json::from_str(&fastp_content).expect("Invalid fastp JSON");
let fasterp_data: Value = serde_json::from_str(&fasterp_content).expect("Invalid fasterp JSON");
let fastp_filtering = &fastp_data["filtering_result"];
let fasterp_filtering = &fasterp_data["filtering_result"];
assert_eq!(
fastp_filtering["passed_filter_reads"], fasterp_filtering["passed_filter_reads"],
"Passed filter reads don't match"
);
assert_eq!(
fastp_filtering["too_short_reads"], fasterp_filtering["too_short_reads"],
"Too short reads don't match"
);
assert_eq!(
fastp_filtering["too_many_N_reads"], fasterp_filtering["too_many_N_reads"],
"Too many N reads don't match"
);
assert_eq!(
fastp_filtering["low_quality_reads"], fasterp_filtering["low_quality_reads"],
"Low quality reads don't match"
);
assert_eq!(
fastp_filtering["total_filtered_reads"], fasterp_filtering["total_filtered_reads"],
"Total filtered reads don't match"
);
assert_eq!(
fastp_filtering["total_reads"], fasterp_filtering["total_reads"],
"Total reads don't match"
);
assert_eq!(
fastp_filtering["total_bases"], fasterp_filtering["total_bases"],
"Total bases don't match"
);
assert_eq!(
fastp_filtering["total_filtered_bases"], fasterp_filtering["total_filtered_bases"],
"Total filtered bases don't match"
);
assert_eq!(
fastp_filtering["total_q20_bases"], fasterp_filtering["total_q20_bases"],
"Total Q20 bases don't match"
);
assert_eq!(
fastp_filtering["total_q30_bases"], fasterp_filtering["total_q30_bases"],
"Total Q30 bases don't match"
);
assert_eq!(
fastp_filtering["gc_content"], fasterp_filtering["gc_content"],
"GC content doesn't match"
);
assert_eq!(
fastp_filtering["average_length"], fasterp_filtering["average_length"],
"Average length doesn't match"
);
}
fn run_fastp_pe(input1: &str, input2: &str, temp_dir: &TempDir) -> (PathBuf, PathBuf, PathBuf) {
let output1 = temp_dir.path().join("fastp_R1.fq");
let output2 = temp_dir.path().join("fastp_R2.fq");
let output_json = temp_dir.path().join("fastp_pe.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path(input1))
.arg("-I")
.arg(test_data_path(input2))
.arg("-o")
.arg(&output1)
.arg("-O")
.arg(&output2)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp PE - is it installed?");
assert!(status.success(), "fastp PE command failed");
(output1, output2, output_json)
}
fn run_fasterp_pe(input1: &str, input2: &str, temp_dir: &TempDir) -> (PathBuf, PathBuf, PathBuf) {
let output1 = temp_dir.path().join("fasterp_R1.fq");
let output2 = temp_dir.path().join("fasterp_R2.fq");
let output_json = temp_dir.path().join("fasterp_pe.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path(input1))
.arg("-I")
.arg(test_data_path(input2))
.arg("-o")
.arg(&output1)
.arg("-O")
.arg(&output2)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp PE");
assert!(status.success(), "fasterp PE command failed");
(output1, output2, output_json)
}
#[test]
fn test_basic_filtering_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("R1.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(fastp_content, fasterp_content, "FASTQ outputs don't match");
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_small_dataset_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let (fastp_fq, fastp_json) = run_fastp("small_1k.fq", &temp_dir);
let (fasterp_fq, fasterp_json) = run_fasterp("small_1k.fq", &temp_dir);
let fastp_content = fs::read_to_string(fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"FASTQ outputs don't match for small dataset"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_medium_dataset_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let (fastp_fq, fastp_json) = run_fastp("medium_10k.fq", &temp_dir);
let (fasterp_fq, fasterp_json) = run_fasterp("medium_10k.fq", &temp_dir);
let fastp_content = fs::read_to_string(fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"FASTQ outputs don't match for medium dataset"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_length_filtering_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Length filtered outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_quality_filtering_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Quality filtered outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_cli_help_works() {
let output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("--help")
.output()
.expect("Failed to run fasterp");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("A fast FASTQ preprocessor"));
}
#[test]
fn test_cli_version_works() {
let output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("--version")
.output()
.expect("Failed to run fasterp");
assert!(output.status.success());
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains(env!("CARGO_PKG_VERSION")));
}
#[test]
fn test_cli_missing_input_fails() {
let output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-o")
.arg("out.fq")
.output()
.expect("Failed to run fasterp");
assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(stderr.contains("required"));
}
#[test]
fn test_gzip_input_decompression() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
let input_gz = temp_dir.path().join("input.fq.gz");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let status = Command::new("gzip")
.arg("-c")
.arg(&input_fq)
.stdout(std::fs::File::create(&input_gz).unwrap())
.status()
.expect("Failed to run gzip");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_gz)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let fasterp_content = fs::read_to_string(&output_fq).unwrap();
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
assert_eq!(
fasterp_content, fastp_content,
"Gzipped input processing differs"
);
}
#[test]
fn test_gzip_output_compression() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
let output_gz = temp_dir.path().join("output.fq.gz");
let output_json = temp_dir.path().join("output.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_gz)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
assert!(output_gz.exists());
let decompressed_fq = temp_dir.path().join("decompressed.fq");
let status = Command::new("gunzip")
.arg("-c")
.arg(&output_gz)
.stdout(std::fs::File::create(&decompressed_fq).unwrap())
.status()
.expect("Failed to run gunzip");
assert!(status.success());
let uncompressed_fq = temp_dir.path().join("uncompressed.fq");
let uncompressed_json = temp_dir.path().join("uncompressed.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&uncompressed_fq)
.arg("-j")
.arg(&uncompressed_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let decompressed_content = fs::read_to_string(&decompressed_fq).unwrap();
let uncompressed_content = fs::read_to_string(&uncompressed_fq).unwrap();
assert_eq!(
decompressed_content, uncompressed_content,
"Gzipped output differs from uncompressed"
);
}
#[test]
fn test_gzip_compression_levels() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
for level in [1, 6, 9] {
let output_gz = temp_dir.path().join(format!("output_level_{level}.fq.gz"));
let output_json = temp_dir.path().join(format!("output_level_{level}.json"));
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_gz)
.arg("-j")
.arg(&output_json)
.arg("-z")
.arg(level.to_string())
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "Compression level {level} failed");
assert!(
output_gz.exists(),
"Output file not created for level {level}"
);
}
let mut contents = Vec::new();
for level in [1, 6, 9] {
let output_gz = temp_dir.path().join(format!("output_level_{level}.fq.gz"));
let decompressed_fq = temp_dir.path().join(format!("decompressed_{level}.fq"));
let status = Command::new("gunzip")
.arg("-c")
.arg(&output_gz)
.stdout(std::fs::File::create(&decompressed_fq).unwrap())
.status()
.expect("Failed to run gunzip");
assert!(status.success());
contents.push(fs::read_to_string(&decompressed_fq).unwrap());
}
assert_eq!(contents[0], contents[1], "Level 1 and 6 differ");
assert_eq!(contents[1], contents[2], "Level 6 and 9 differ");
}
#[test]
fn test_n_base_filtering_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
for n_limit in [0, 3, 10] {
let fastp_fq = temp_dir.path().join(format!("fastp_n{n_limit}.fq"));
let fastp_json = temp_dir.path().join(format!("fastp_n{n_limit}.json"));
let fasterp_fq = temp_dir.path().join(format!("fasterp_n{n_limit}.fq"));
let fasterp_json = temp_dir.path().join(format!("fasterp_n{n_limit}.json"));
let status = Command::new("fastp")
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("-n")
.arg(n_limit.to_string())
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("-n")
.arg(n_limit.to_string())
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"N-base filtering with limit {n_limit} differs"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
}
#[test]
fn test_multithreading_consistency() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let thread_counts = [1, 2, 4, 8];
let mut outputs = Vec::new();
for threads in &thread_counts {
let output_fq = temp_dir.path().join(format!("output_t{threads}.fq"));
let output_json = temp_dir.path().join(format!("output_t{threads}.json"));
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg(threads.to_string())
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "Failed with {threads} threads");
let content = fs::read_to_string(&output_fq).unwrap();
outputs.push(content);
}
for i in 1..outputs.len() {
assert_eq!(
outputs[0], outputs[i],
"Output differs between 1 thread and {} threads",
thread_counts[i]
);
}
}
#[test]
fn test_multithreading_json_consistency() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let output_1t_json = temp_dir.path().join("output_1t.json");
let output_4t_json = temp_dir.path().join("output_4t.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(temp_dir.path().join("output_1t.fq"))
.arg("-j")
.arg(&output_1t_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(temp_dir.path().join("output_4t.fq"))
.arg("-j")
.arg(&output_4t_json)
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&output_1t_json, &output_4t_json);
}
#[test]
fn test_multithreading_large_dataset() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("large_100k.fq");
let output_1t_fq = temp_dir.path().join("output_1t.fq");
let output_8t_fq = temp_dir.path().join("output_8t.fq");
let output_1t_json = temp_dir.path().join("output_1t.json");
let output_8t_json = temp_dir.path().join("output_8t.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_1t_fq)
.arg("-j")
.arg(&output_1t_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_8t_fq)
.arg("-j")
.arg(&output_8t_json)
.arg("-w")
.arg("8")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let output_1t = fs::read_to_string(&output_1t_fq).unwrap();
let output_8t = fs::read_to_string(&output_8t_fq).unwrap();
assert_eq!(
output_1t, output_8t,
"Multi-threading produces different output on large dataset"
);
compare_json_outputs(&output_1t_json, &output_8t_json);
}
#[test]
fn test_stdin_input() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let input_file = std::fs::File::open(&input_fq).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg("-")
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("1")
.stdin(input_file)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let expected_fq = temp_dir.path().join("expected.fq");
let expected_json = temp_dir.path().join("expected.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&expected_fq)
.arg("-j")
.arg(&expected_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let output_content = fs::read_to_string(&output_fq).unwrap();
let expected_content = fs::read_to_string(&expected_fq).unwrap();
assert_eq!(
output_content, expected_content,
"Stdin input produces different output"
);
}
#[test]
fn test_stdin_stdout_pipeline() {
let input_fq = test_data_path("R1.fq");
let input_file = std::fs::File::open(&input_fq).unwrap();
let output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg("-")
.arg("-o")
.arg("-")
.arg("-j")
.arg("/dev/null")
.arg("-w")
.arg("1")
.stdin(input_file)
.output()
.expect("Failed to run fasterp");
assert!(output.status.success());
let stdout_content = String::from_utf8(output.stdout).unwrap();
assert!(
!stdout_content.is_empty(),
"No output from stdin/stdout pipeline"
);
}
#[test]
fn test_combined_filters_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("-l")
.arg("30")
.arg("-q")
.arg("15")
.arg("-n")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("-l")
.arg("30")
.arg("-q")
.arg("15")
.arg("-n")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Combined filters produce different output"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_strict_combined_filters() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("-l")
.arg("100")
.arg("-q")
.arg("30")
.arg("-n")
.arg("0")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("-l")
.arg("100")
.arg("-q")
.arg("30")
.arg("-n")
.arg("0")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Strict filters produce different output"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
fn create_fastq_record(name: &str, seq: &str, qual: &str) -> String {
format!("@{name}\n{seq}\n+\n{qual}\n")
}
fn quality_string_from_phred(phred_scores: &[u8]) -> String {
phred_scores.iter().map(|&p| (p + 33) as char).collect()
}
#[test]
fn test_unqualified_percent_boundary_40_percent() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let mut qual_scores = vec![15u8; 853]; qual_scores.extend(vec![5u8; 578]);
let seq = "A".repeat(1431);
let qual = quality_string_from_phred(&qual_scores);
let record = create_fastq_record("test_read", &seq, &qual);
fs::write(&input_fq, record).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-q")
.arg("9")
.arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert!(
content.is_empty(),
"Read with 40.39% unqualified should be filtered"
);
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["low_quality_reads"]
.as_u64()
.unwrap(),
1
);
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
0
);
}
#[test]
fn test_unqualified_percent_exact_40_percent() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let mut qual_scores = vec![15u8; 60]; qual_scores.extend(vec![5u8; 40]);
let seq = "A".repeat(100);
let qual = quality_string_from_phred(&qual_scores);
let record = create_fastq_record("test_read", &seq, &qual);
fs::write(&input_fq, record).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-q")
.arg("9")
.arg("-u")
.arg("40")
.arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert!(
!content.is_empty(),
"Read with exactly 40.0% unqualified should pass"
);
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
1
);
}
#[test]
fn test_average_quality_filter() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let seq = "A".repeat(100);
let qual_20 = quality_string_from_phred(&[20u8; 100]); let qual_15 = quality_string_from_phred(&[15u8; 100]);
let mut input_data = String::new();
input_data.push_str(&create_fastq_record("read_q20", &seq, &qual_20));
input_data.push_str(&create_fastq_record("read_q15", &seq, &qual_15));
fs::write(&input_fq, input_data).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-e")
.arg("18")
.arg("-q")
.arg("0") .arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert_eq!(content.lines().count(), 4, "Should have 1 read (4 lines)");
assert!(content.contains("read_q20"), "read_q20 should pass");
assert!(!content.contains("read_q15"), "read_q15 should be filtered");
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
1
);
assert_eq!(
json["filtering_result"]["low_quality_reads"]
.as_u64()
.unwrap(),
1
);
}
#[test]
fn test_max_length_trimming() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let seq = "ACGT".repeat(37) + "AC"; let qual = quality_string_from_phred(&[30u8; 150]);
let record = create_fastq_record("long_read", &seq, &qual);
fs::write(&input_fq, record).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-b")
.arg("100")
.arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
let lines: Vec<&str> = content.lines().collect();
assert_eq!(lines.len(), 4);
assert_eq!(lines[1].len(), 100, "Sequence should be trimmed to 100bp");
assert_eq!(lines[3].len(), 100, "Quality should be trimmed to 100bp");
}
#[test]
fn test_combined_q_u_and_e_filters() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let mut qual1 = vec![25u8; 50]; qual1.extend(vec![5u8; 50]);
let qual2 = vec![12u8; 100];
let qual3 = vec![20u8; 100];
let seq = "A".repeat(100);
let mut input_data = String::new();
input_data.push_str(&create_fastq_record(
"read1",
&seq,
&quality_string_from_phred(&qual1),
));
input_data.push_str(&create_fastq_record(
"read2",
&seq,
&quality_string_from_phred(&qual2),
));
input_data.push_str(&create_fastq_record(
"read3",
&seq,
&quality_string_from_phred(&qual3),
));
fs::write(&input_fq, input_data).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-q")
.arg("9")
.arg("-u")
.arg("40")
.arg("-e")
.arg("15")
.arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert_eq!(content.lines().count(), 4, "Only 1 read should pass");
assert!(content.contains("read3"), "Only read3 should pass");
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
1
);
assert_eq!(
json["filtering_result"]["low_quality_reads"]
.as_u64()
.unwrap(),
2
);
}
#[test]
fn test_n_base_limit_exact_boundary() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let seq_5n = "ACGT".repeat(20) + "NNNNN"; let seq_6n = "ACGT".repeat(20) + "NNNNNN"; let qual = quality_string_from_phred(&[30u8; 85]);
let qual6 = quality_string_from_phred(&[30u8; 86]);
let mut input_data = String::new();
input_data.push_str(&create_fastq_record("read_5n", &seq_5n, &qual));
input_data.push_str(&create_fastq_record("read_6n", &seq_6n, &qual6));
fs::write(&input_fq, input_data).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-n")
.arg("5")
.arg("-l")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert_eq!(content.lines().count(), 4);
assert!(content.contains("read_5n"));
assert!(!content.contains("read_6n"));
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
1
);
assert_eq!(
json["filtering_result"]["too_many_N_reads"]
.as_u64()
.unwrap(),
1
);
}
#[test]
fn test_length_filter_after_trimming() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let seq = "A".repeat(100);
let qual = quality_string_from_phred(&[30u8; 100]);
let record = create_fastq_record("test_read", &seq, &qual);
fs::write(&input_fq, record).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("--trim-front")
.arg("10")
.arg("--trim-tail")
.arg("40")
.arg("-l")
.arg("60")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert!(content.is_empty(), "Read should be filtered after trimming");
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["too_short_reads"]
.as_u64()
.unwrap(),
1
);
}
#[test]
fn test_empty_output_all_filtered() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let mut input_data = String::new();
for i in 0..10 {
let seq = "A".repeat(50); let qual = quality_string_from_phred(&[30u8; 50]);
input_data.push_str(&create_fastq_record(&format!("read{i}"), &seq, &qual));
}
fs::write(&input_fq, input_data).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-l")
.arg("1000") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let content = fs::read_to_string(&output_fq).unwrap();
assert!(content.is_empty(), "All reads should be filtered");
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert_eq!(
json["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap(),
0
);
assert_eq!(
json["filtering_result"]["too_short_reads"]
.as_u64()
.unwrap(),
10
);
assert_eq!(
json["summary"]["before_filtering"]["total_reads"]
.as_u64()
.unwrap(),
10
);
}
#[test]
fn test_invalid_quality_threshold() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-q")
.arg("0")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
}
#[test]
#[ignore] fn test_large_dataset_100k_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let (fastp_fq, fastp_json) = run_fastp("large_100k.fq", &temp_dir);
let (fasterp_fq, fasterp_json) = run_fasterp("large_100k.fq", &temp_dir);
let fastp_content = fs::read_to_string(fastp_fq).unwrap();
let fasterp_content = fs::read_to_string(fasterp_fq).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Large dataset output differs"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
#[ignore] fn test_xlarge_dataset_500k() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("xlarge_500k.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("8")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
assert!(output_fq.exists());
assert!(output_json.exists());
}
#[test]
fn test_different_batch_sizes_consistent() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let batch_sizes = [1024 * 1024, 4 * 1024 * 1024, 16 * 1024 * 1024]; let mut outputs = Vec::new();
for (i, &batch_size) in batch_sizes.iter().enumerate() {
let output_fq = temp_dir.path().join(format!("output_batch_{i}.fq"));
let output_json = temp_dir.path().join(format!("output_batch_{i}.json"));
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("4")
.arg("--batch-bytes")
.arg(batch_size.to_string())
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "Failed with batch size {batch_size}");
let content = fs::read_to_string(&output_fq).unwrap();
outputs.push(content);
}
for i in 1..outputs.len() {
assert_eq!(
outputs[0], outputs[i],
"Output differs with different batch sizes"
);
}
}
#[test]
fn test_json_report_structure() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("small_1k.fq");
let output_fq = temp_dir.path().join("output.fq");
let output_json = temp_dir.path().join("output.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_fq)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let json_content = fs::read_to_string(&output_json).unwrap();
let json: Value = serde_json::from_str(&json_content).unwrap();
assert!(json.get("summary").is_some(), "Missing 'summary' field");
assert!(
json.get("filtering_result").is_some(),
"Missing 'filtering_result' field"
);
assert!(
json.get("read1_before_filtering").is_some(),
"Missing 'read1_before_filtering' field"
);
let summary = &json["summary"];
assert!(summary.get("fastp_version").is_some());
assert!(summary.get("before_filtering").is_some());
assert!(summary.get("after_filtering").is_some());
let filtering = &json["filtering_result"];
assert!(filtering.get("passed_filter_reads").is_some());
assert!(filtering.get("low_quality_reads").is_some());
assert!(filtering.get("too_many_N_reads").is_some());
assert!(filtering.get("too_short_reads").is_some());
let detailed = &json["read1_before_filtering"];
assert!(detailed.get("quality_curves").is_some());
assert!(detailed.get("kmer_count").is_some());
}
#[test]
fn test_output_order_preserved_multithreading() {
let temp_dir = TempDir::new().unwrap();
let input_fq = test_data_path("medium_10k.fq");
let output_st = temp_dir.path().join("output_st.fq");
let output_mt = temp_dir.path().join("output_mt.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_st)
.arg("-j")
.arg("/dev/null")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_mt)
.arg("-j")
.arg("/dev/null")
.arg("-w")
.arg("8")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let st_content = fs::read_to_string(&output_st).unwrap();
let mt_content = fs::read_to_string(&output_mt).unwrap();
assert_eq!(
st_content, mt_content,
"Multi-threading changes output order"
);
}
#[test]
fn test_sliding_window_tail_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--cut_tail")
.arg("--cut_mean_quality")
.arg("20")
.arg("--cut_window_size")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--cut-tail")
.arg("--cut-mean-quality")
.arg("20")
.arg("--cut-window-size")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_sliding_window_front_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--cut_front")
.arg("--cut_mean_quality")
.arg("25")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--cut-front")
.arg("--cut-mean-quality")
.arg("25")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_sliding_window_both_ends_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--cut_front")
.arg("--cut_tail")
.arg("--cut_mean_quality")
.arg("20")
.arg("--cut_window_size")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--cut-front")
.arg("--cut-tail")
.arg("--cut-mean-quality")
.arg("20")
.arg("--cut-window-size")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_fixed_front_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-front")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_fixed_tail_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_tail1")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-tail")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_fixed_both_ends_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("7")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-front")
.arg("3")
.arg("--trim-tail")
.arg("7")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_poly_g_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_poly_g")
.arg("--poly_g_min_len")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-poly-g")
.arg("--poly-g-min-len")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_poly_x_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_poly_x")
.arg("--poly_x_min_len")
.arg("12")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-poly-x")
.arg("--poly-g-min-len")
.arg("12")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_combined_trimming_and_filtering_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("2")
.arg("--cut_tail")
.arg("--cut_mean_quality")
.arg("20")
.arg("--trim_poly_g")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-front")
.arg("2")
.arg("--cut-tail")
.arg("--cut-mean-quality")
.arg("20")
.arg("--trim-poly-g")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_disable_tail_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("small_1k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--disable_trim_poly_g")
.arg("--cut_mean_quality")
.arg("0")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--disable-trim-poly-g")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_aggressive_trimming_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("medium_10k.fq");
let fastp_fq = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_fq = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fastp_fq)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("5")
.arg("--cut_front")
.arg("--cut_tail")
.arg("--cut_mean_quality")
.arg("30")
.arg("--cut_window_size")
.arg("3")
.arg("--trim_poly_g")
.arg("--poly_g_min_len")
.arg("8")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&fasterp_fq)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim-front")
.arg("5")
.arg("--trim-tail")
.arg("5")
.arg("--cut-front")
.arg("--cut-tail")
.arg("--cut-mean-quality")
.arg("30")
.arg("--cut-window-size")
.arg("3")
.arg("--trim-poly-g")
.arg("--poly-g-min-len")
.arg("8")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_trimming_with_multithreading_consistency() {
let temp_dir = TempDir::new().unwrap();
let input_path = test_data_path("large_100k.fq");
let output_st = temp_dir.path().join("output_st.fq");
let output_mt = temp_dir.path().join("output_mt.fq");
let json_st = temp_dir.path().join("output_st.json");
let json_mt = temp_dir.path().join("output_mt.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&output_st)
.arg("-j")
.arg(&json_st)
.arg("-w")
.arg("1")
.arg("--cut-mean-quality")
.arg("20")
.arg("--trim-front")
.arg("3")
.arg("--poly-g-min-len")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_path)
.arg("-o")
.arg(&output_mt)
.arg("-j")
.arg(&json_mt)
.arg("-w")
.arg("4")
.arg("--cut-mean-quality")
.arg("20")
.arg("--trim-front")
.arg("3")
.arg("--poly-g-min-len")
.arg("10")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
compare_json_outputs(&json_st, &json_mt);
}
#[test]
fn test_paired_end_basic_processing() {
let temp_dir = TempDir::new().unwrap();
let (fastp_r1, fastp_r2, fastp_json) =
run_fastp_pe("pe_small_R1.fq", "pe_small_R2.fq", &temp_dir);
let (fasterp_r1, fasterp_r2, fasterp_json) =
run_fasterp_pe("pe_small_R1.fq", "pe_small_R2.fq", &temp_dir);
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_quality_filtering() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-q")
.arg("20")
.arg("-u")
.arg("40")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-q")
.arg("20")
.arg("-u")
.arg("40")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_length_filtering() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-l")
.arg("100")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-l")
.arg("100")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_combined_filters() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_adapter_trimming_single_end_custom_adapter() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter = "AGATCGGAAGAGC";
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg(adapter)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(fastp_content, fasterp_content, "Outputs don't match");
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_disable_adapter_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(fastp_content, fasterp_content, "Outputs don't match");
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_adapter_trimming_with_quality_filter() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter = "AGATCGGAAGAGC";
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter)
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("small_1k.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg(adapter)
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(fastp_content, fasterp_content, "Outputs don't match");
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_adapter_5base_perfect_match() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(&test_input, "@read1\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAGATC\n+\n?????????????????????????????????????????????????????????????\n").unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let status = Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"5-base adapter match outputs don't match"
);
}
#[test]
fn test_adapter_8base_with_mismatch() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(&test_input, "@read1\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAGATCGGC\n+\n????????????????????????????????????????????????????????????????\n").unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"8-base adapter with mismatch outputs don't match"
);
}
#[test]
fn test_adapter_5base_with_mismatch_not_trimmed() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(&test_input, "@read1\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAGATX\n+\n?????????????????????????????????????????????????????????????\n").unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"5-base adapter with mismatch should not be trimmed"
);
}
#[test]
fn test_paired_end_asymmetric_tail_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_tail1")
.arg("8")
.arg("--trim_tail2")
.arg("4")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("--trim_tail1")
.arg("8")
.arg("--trim_tail2")
.arg("4")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match with asymmetric tail trimming"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with asymmetric tail trimming"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_adapter_at_exact_minimum_length() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(
&test_input,
"@read1\nACGTACGTACGTACGAGATCGGAAGAGC\n+\n????????????????????????????\n",
)
.unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-l")
.arg("15") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-l")
.arg("15")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match for read at exact minimum length"
);
assert!(
fastp_content.contains("ACGTACGTACGTACG"),
"Read should be present in output"
);
}
#[test]
fn test_adapter_below_minimum_length() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(
&test_input,
"@read1\nACGTACGTACGTACGATCGGAAGAGC\n+\n??????????????????????????\n",
)
.unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-l")
.arg("15") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-l")
.arg("15")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match for read below minimum length"
);
assert!(
fastp_content.is_empty(),
"Output should be empty (read filtered out)"
);
}
#[test]
fn test_extreme_quality_filter_with_adapters() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-q")
.arg("30") .arg("-u")
.arg("10") .arg("-l")
.arg("100") .arg("-n")
.arg("2") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-q")
.arg("30")
.arg("-u")
.arg("10")
.arg("-l")
.arg("100")
.arg("-n")
.arg("2")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match with extreme quality filter"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_extreme_quality_filter_with_adapters_med() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-q")
.arg("30") .arg("-u")
.arg("10") .arg("-l")
.arg("100") .arg("-n")
.arg("2") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-q")
.arg("30")
.arg("-u")
.arg("10")
.arg("-l")
.arg("100")
.arg("-n")
.arg("2")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match with extreme quality filter"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_minimal_all_features() {
let temp_dir = TempDir::new().unwrap();
let test_r1 = temp_dir.path().join("test_r1.fq");
let test_r2 = temp_dir.path().join("test_r2.fq");
let r1_data = "@seq.0
CCAGAAGCAAGTACACAGATATTTCTCGGAATCGTGTACATGCGCAGCAGTACACGCTTTTATGTAAGACGCAGCTTGTAGACGTACCACTATAGCGTGGGAGAATAACCGCCTTTGCCGTTATTCGAAGACCCCCCCCGGCCGCATCTC
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
@seq.1
TTGGAGCAACACCCACGCCGAAATTTCGAAACGTTACAGCGCGACCAAGATCGAGGTCATCCCCTTCACGATTAATCGCTACATGTGCTGGACAGTTTCTTGGACCTGATTATCCCATAATTAAGGGCTACGGTCACAAAAAGGATGCTA
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
let r2_data = "@seq.0
CGAAACGCCAGATCTCAATGCCACCCCAAAAGTTTAACTACAAACGGTACTAATTATCGTATCAAATGTATCACAAGAGTAAAGTTCCCGCAGAGGGCGGAGCGACAGGCACGGTCTATGATGAAATAGGGCTGGGACTGTACCTACGTG
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
@seq.1
AGCTGCATCGCTTTAGTATCTAGCGAAGCGTTAGGCGACCGCTGCCCTAGTTGACTAGACTGCTGTCCGGAATTTTGAGATCTCCACAATTATTTGTGGTCCAAATAACGTCATATACCCGTGTCCACTCGCTAGACCACGACCTTCTTC
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
fs::write(&test_r1, r1_data).unwrap();
fs::write(&test_r2, r2_data).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("4")
.arg("--trim_tail2")
.arg("3")
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("4")
.arg("--trim_tail2")
.arg("3")
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_low_complexity_filter_basic() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_input.fq");
fs::write(&test_input, "@low1\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+\n????????????????????????????????????????\n@high1\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n+\n????????????????????????????????????????\n@low2\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\n????????????????????????????????????????\n@high2\nGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\n+\n????????????????????????????????????????\n").unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-y") .arg("-Y")
.arg("30") .arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-y")
.arg("-Y")
.arg("30")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Low complexity filter outputs don't match"
);
}
#[test]
fn test_low_complexity_with_quality_filter() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-y") .arg("-Y")
.arg("30") .arg("-q")
.arg("20") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-y")
.arg("-Y")
.arg("30")
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match with combined filters"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_low_complexity_paired_end() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-y")
.arg("-Y")
.arg("30")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-y")
.arg("-Y")
.arg("30")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_multithreaded_pe_basic() {
let single_r1 = "/tmp/test_mt_pe_basic_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_basic_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_basic_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_basic_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_basic_single.json";
let multi_json = "/tmp/test_mt_pe_basic_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match between single and multi-threaded"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match between single and multi-threaded"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_multithreaded_pe_with_quality_filter() {
let single_r1 = "/tmp/test_mt_pe_qual_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_qual_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_qual_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_qual_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_qual_single.json";
let multi_json = "/tmp/test_mt_pe_qual_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("-q")
.arg("25") .arg("-u")
.arg("30") .arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("-q")
.arg("25")
.arg("-u")
.arg("30")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_multithreaded_pe_with_adapters() {
let single_r1 = "/tmp/test_mt_pe_adapter_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_adapter_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_adapter_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_adapter_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_adapter_single.json";
let multi_json = "/tmp/test_mt_pe_adapter_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("--adapter_sequence")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("--adapter_sequence")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_multithreaded_pe_with_trimming() {
let single_r1 = "/tmp/test_mt_pe_trim_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_trim_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_trim_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_trim_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_trim_single.json";
let multi_json = "/tmp/test_mt_pe_trim_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("--trim_front2")
.arg("3")
.arg("--trim_tail2")
.arg("5")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("--trim_front2")
.arg("3")
.arg("--trim_tail2")
.arg("5")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_multithreaded_pe_all_features() {
let single_r1 = "/tmp/test_mt_pe_all_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_all_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_all_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_all_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_all_single.json";
let multi_json = "/tmp/test_mt_pe_all_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("-q")
.arg("20") .arg("-u")
.arg("40") .arg("-l")
.arg("25") .arg("-n")
.arg("5") .arg("--trim_front1")
.arg("3") .arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("2")
.arg("--trim_tail2")
.arg("3")
.arg("--adapter_sequence")
.arg("AGATCGGAAGAGC") .arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("-y") .arg("-Y")
.arg("30")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("-q")
.arg("20")
.arg("-u")
.arg("40")
.arg("-l")
.arg("25")
.arg("-n")
.arg("5")
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("2")
.arg("--trim_tail2")
.arg("3")
.arg("--adapter_sequence")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("-y")
.arg("-Y")
.arg("30")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_multithreaded_pe_low_complexity() {
let single_r1 = "/tmp/test_mt_pe_complexity_single_R1.fq";
let single_r2 = "/tmp/test_mt_pe_complexity_single_R2.fq";
let multi_r1 = "/tmp/test_mt_pe_complexity_multi_R1.fq";
let multi_r2 = "/tmp/test_mt_pe_complexity_multi_R2.fq";
let single_json = "/tmp/test_mt_pe_complexity_single.json";
let multi_json = "/tmp/test_mt_pe_complexity_multi.json";
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(single_r1)
.arg("-O")
.arg(single_r2)
.arg("-j")
.arg(single_json)
.arg("-y")
.arg("-Y")
.arg("30")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(multi_r1)
.arg("-O")
.arg(multi_r2)
.arg("-j")
.arg(multi_json)
.arg("-y")
.arg("-Y")
.arg("30")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let single_r1_content = fs::read_to_string(single_r1).unwrap();
let multi_r1_content = fs::read_to_string(multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs don't match"
);
let single_r2_content = fs::read_to_string(single_r2).unwrap();
let multi_r2_content = fs::read_to_string(multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&PathBuf::from(single_json), &PathBuf::from(multi_json));
}
#[test]
fn test_base_correction_basic_functionality() {
let temp_dir = TempDir::new().unwrap();
let output_r1 = temp_dir.path().join("corrected_R1.fq");
let output_r2 = temp_dir.path().join("corrected_R2.fq");
let output_json = temp_dir.path().join("corrected.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("merge_test_R1.fq"))
.arg("-I")
.arg(test_data_path("merge_test_R2.fq"))
.arg("-o")
.arg(&output_r1)
.arg("-O")
.arg(&output_r2)
.arg("-j")
.arg(&output_json)
.arg("-c") .arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with correction");
assert!(status.success());
assert!(output_r1.exists(), "R1 output doesn't exist");
assert!(output_r2.exists(), "R2 output doesn't exist");
let r1_content = fs::read_to_string(&output_r1).unwrap();
let r2_content = fs::read_to_string(&output_r2).unwrap();
let number_of_reads = r1_content.lines().count() / 4;
assert_eq!(r1_content.lines().count(), number_of_reads * 4); assert_eq!(r2_content.lines().count(), number_of_reads * 4);
}
#[test]
fn test_base_correction_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_corrected_R1.fq");
let fastp_r2 = temp_dir.path().join("fastp_corrected_R2.fq");
let fastp_json = temp_dir.path().join("fastp_corrected.json");
let fasterp_r1 = temp_dir.path().join("fasterp_corrected_R1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_corrected_R2.fq");
let fasterp_json = temp_dir.path().join("fasterp_corrected.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-c") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp with correction");
assert!(status.success(), "fastp with correction failed");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-c") .arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with correction");
assert!(status.success(), "fasterp with correction failed");
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs with correction don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs with correction don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_base_correction_multithreaded() {
let temp_dir = TempDir::new().unwrap();
let single_r1 = temp_dir.path().join("single_R1.fq");
let single_r2 = temp_dir.path().join("single_R2.fq");
let single_json = temp_dir.path().join("single.json");
let multi_r1 = temp_dir.path().join("multi_R1.fq");
let multi_r2 = temp_dir.path().join("multi_R2.fq");
let multi_json = temp_dir.path().join("multi.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&single_r1)
.arg("-O")
.arg(&single_r2)
.arg("-j")
.arg(&single_json)
.arg("-c")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run single-threaded with correction");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&multi_r1)
.arg("-O")
.arg(&multi_r2)
.arg("-j")
.arg(&multi_json)
.arg("-c")
.arg("-w")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run multi-threaded with correction");
assert!(status.success());
let single_r1_content = fs::read_to_string(&single_r1).unwrap();
let multi_r1_content = fs::read_to_string(&multi_r1).unwrap();
assert_eq!(
single_r1_content, multi_r1_content,
"R1 outputs differ between single and multi-threaded correction"
);
let single_r2_content = fs::read_to_string(&single_r2).unwrap();
let multi_r2_content = fs::read_to_string(&multi_r2).unwrap();
assert_eq!(
single_r2_content, multi_r2_content,
"R2 outputs differ between single and multi-threaded correction"
);
compare_json_outputs(&single_json, &multi_json);
}
#[test]
fn test_base_correction_custom_parameters() {
let temp_dir = TempDir::new().unwrap();
let output_r1 = temp_dir.path().join("custom_R1.fq");
let output_r2 = temp_dir.path().join("custom_R2.fq");
let output_json = temp_dir.path().join("custom.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&output_r1)
.arg("-O")
.arg(&output_r2)
.arg("-j")
.arg(&output_json)
.arg("-c")
.arg("--overlap-len-require")
.arg("20")
.arg("--overlap-diff-limit")
.arg("3")
.arg("--overlap-diff-percent-limit")
.arg("15")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run with custom parameters");
assert!(status.success());
assert!(output_r1.exists());
assert!(output_r2.exists());
assert!(output_json.exists());
}
#[test]
fn test_base_correction_with_filtering() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_filter_R1.fq");
let fastp_r2 = temp_dir.path().join("fastp_filter_R2.fq");
let fastp_json = temp_dir.path().join("fastp_filter.json");
let fasterp_r1 = temp_dir.path().join("fasterp_filter_R1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_filter_R2.fq");
let fasterp_json = temp_dir.path().join("fasterp_filter.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-c") .arg("-l")
.arg("20") .arg("-q")
.arg("15") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-c") .arg("-l")
.arg("20") .arg("-q")
.arg("15") .arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs with correction+filtering don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs with correction+filtering don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_base_correction_disabled_by_default() {
let temp_dir = TempDir::new().unwrap();
let with_c_r1 = temp_dir.path().join("with_c_R1.fq");
let with_c_r2 = temp_dir.path().join("with_c_R2.fq");
let without_c_r1 = temp_dir.path().join("without_c_R1.fq");
let without_c_r2 = temp_dir.path().join("without_c_R2.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("merge_test_R1.fq"))
.arg("-I")
.arg(test_data_path("merge_test_R2.fq"))
.arg("-o")
.arg(&with_c_r1)
.arg("-O")
.arg(&with_c_r2)
.arg("-c")
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run with -c");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("merge_test_R1.fq"))
.arg("-I")
.arg(test_data_path("merge_test_R2.fq"))
.arg("-o")
.arg(&without_c_r1)
.arg("-O")
.arg(&without_c_r2)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run without -c");
assert!(status.success());
let with_c_content = fs::read_to_string(&with_c_r1).unwrap();
let without_c_content = fs::read_to_string(&without_c_r1).unwrap();
assert!(with_c_r1.exists());
assert!(without_c_r1.exists());
assert_eq!(
with_c_content.lines().count(),
without_c_content.lines().count()
);
}
#[test]
fn test_umi_extraction_read1_paired_end() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("umi_R1.fq");
let r2_input = temp_dir.path().join("umi_R2.fq");
let r1_output = temp_dir.path().join("out_R1.fq");
let r2_output = temp_dir.path().join("out_R2.fq");
let r1_content = "@read1\nACGTACGTAAAAAAAAAAAAAAAAAAAAAA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read2\nTTGGCCAACCCCCCCCCCCCCCCCCCCCCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content = "@read1\nGGCCAATTGGGGGGGGGGGGGGGGGGGGGG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read2\nCGATCGATTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_output)
.arg("-O")
.arg(&r2_output)
.arg("--umi")
.arg("--umi-len")
.arg("8")
.arg("--umi-loc")
.arg("read1")
.arg("--disable-length-filtering")
.arg("--threads")
.arg("1") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let r1_out = fs::read_to_string(&r1_output).unwrap();
let r2_out = fs::read_to_string(&r2_output).unwrap();
assert!(
r1_out.contains("@read1:UMI_ACGTACGT"),
"R1 header should contain UMI"
);
assert!(
r1_out.contains("@read2:UMI_TTGGCCAA"),
"R1 header should contain UMI for read2"
);
assert!(
r2_out.contains("@read1:UMI_ACGTACGT"),
"R2 header should contain UMI"
);
assert!(
r2_out.contains("@read2:UMI_TTGGCCAA"),
"R2 header should contain UMI for read2"
);
assert!(
r1_out.contains("AAAAAAAAAAAAAAAAAAAAA"),
"R1 sequence should have UMI removed"
);
assert!(
r2_out.contains("GGCCAATTGGGGGGGGGGGGGGGGGGGGG"),
"R2 sequence should be unchanged"
);
}
#[test]
fn test_umi_extraction_read2_paired_end() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("umi_R1.fq");
let r2_input = temp_dir.path().join("umi_R2.fq");
let r1_output = temp_dir.path().join("out_R1.fq");
let r2_output = temp_dir.path().join("out_R2.fq");
let r1_content =
"@read1\nGGCCAATTGGGGGGGGGGGGGGGGGGGGGGG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content =
"@read1\nACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_output)
.arg("-O")
.arg(&r2_output)
.arg("--umi")
.arg("--umi-len")
.arg("6")
.arg("--umi-loc")
.arg("read2")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.arg("--threads")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let r1_out = fs::read_to_string(&r1_output).unwrap();
let r2_out = fs::read_to_string(&r2_output).unwrap();
assert!(
r1_out.contains("@read1:UMI_ACGTAC"),
"R1 header should contain UMI from R2"
);
assert!(
r2_out.contains("@read1:UMI_ACGTAC"),
"R2 header should contain UMI"
);
assert!(
r1_out.contains("GGCCAATTGGGGGGGGGGGGGGGGGGGGG"),
"R1 sequence (29bp due to trim bug)"
);
assert!(
r2_out.contains("AAAAAAAAAAAAAAAAAAAAAAA"),
"R2 sequence should have UMI removed (23bp due to trim bug)"
);
}
#[test]
fn test_umi_multithreaded_consistency() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("umi_R1.fq");
let r2_input = temp_dir.path().join("umi_R2.fq");
let r1_single = temp_dir.path().join("out_single_R1.fq");
let r2_single = temp_dir.path().join("out_single_R2.fq");
let r1_multi = temp_dir.path().join("out_multi_R1.fq");
let r2_multi = temp_dir.path().join("out_multi_R2.fq");
let r1_content = "@read1\nACGTACGTAAAAAAAAAAAAAAAAAAAAAA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read2\nTTGGCCAACCCCCCCCCCCCCCCCCCCCCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read3\nGATCGATCGGGGGGGGGGGGGGGGGGGGGG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content = "@read1\nGGCCAATTGGGGGGGGGGGGGGGGGGGGGG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read2\nCGATCGATTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n@read3\nTATATATATTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_single)
.arg("-O")
.arg(&r2_single)
.arg("--umi")
.arg("--umi-len")
.arg("8")
.arg("--umi-loc")
.arg("read1")
.arg("--disable-length-filtering")
.arg("--threads")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_multi)
.arg("-O")
.arg(&r2_multi)
.arg("--umi")
.arg("--umi-len")
.arg("8")
.arg("--umi-loc")
.arg("read1")
.arg("--disable-length-filtering")
.arg("--threads")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status.success());
let r1_single_content = fs::read_to_string(&r1_single).unwrap();
let r1_multi_content = fs::read_to_string(&r1_multi).unwrap();
let r2_single_content = fs::read_to_string(&r2_single).unwrap();
let r2_multi_content = fs::read_to_string(&r2_multi).unwrap();
assert_eq!(
r1_single_content, r1_multi_content,
"R1 outputs should match between single and multi-threaded"
);
assert_eq!(
r2_single_content, r2_multi_content,
"R2 outputs should match between single and multi-threaded"
);
assert!(r1_single_content.contains("@read1:UMI_ACGTACGT"));
assert!(r1_single_content.contains("@read2:UMI_TTGGCCAA"));
assert!(r1_single_content.contains("@read3:UMI_GATCGATC"));
}
#[test]
fn test_umi_with_trimming_and_filtering() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("umi_R1.fq");
let r2_input = temp_dir.path().join("umi_R2.fq");
let r1_output = temp_dir.path().join("out_R1.fq");
let r2_output = temp_dir.path().join("out_R2.fq");
let r1_content = "@read1\nACGTACGTAAAAAAAAAAGGGGGGGGGGGG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content = "@read1\nCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_output)
.arg("-O")
.arg(&r2_output)
.arg("--umi")
.arg("--umi-len")
.arg("8")
.arg("--umi-loc")
.arg("read1")
.arg("--trim-poly-g")
.arg("--disable-length-filtering")
.arg("--threads")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let r1_out = fs::read_to_string(&r1_output).unwrap();
assert!(r1_out.contains("@read1:UMI_ACGTACGT"));
assert!(r1_out.contains("AAAAAAAAAA"));
assert!(
!r1_out.contains("GGGGGGGGGGGG"),
"Poly-G tail should be trimmed"
);
}
#[test]
fn test_dedup_paired_end_exact_duplicates() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("dedup_R1.fq");
let r2_input = temp_dir.path().join("dedup_R2.fq");
let r1_output = temp_dir.path().join("out_R1.fq");
let r2_output = temp_dir.path().join("out_R2.fq");
let r1_content = "@read1\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read2\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read3\nGGCCAATTGGCCAATTGGCCAATTGGCCAATG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content = "@read1\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read2\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read3\nCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_output)
.arg("-O")
.arg(&r2_output)
.arg("--dedup")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.arg("--threads")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let r1_out = fs::read_to_string(&r1_output).unwrap();
let r2_out = fs::read_to_string(&r2_output).unwrap();
let r1_lines: Vec<&str> = r1_out.lines().collect();
let r2_lines: Vec<&str> = r2_out.lines().collect();
assert_eq!(
r1_lines.len(),
8,
"R1 should have 2 unique records (8 lines)"
);
assert_eq!(
r2_lines.len(),
8,
"R2 should have 2 unique records (8 lines)"
);
assert!(r1_out.contains("ACGTACGTACGTACGTACGTACGTACGTACG")); assert!(r1_out.contains("GGCCAATTGGCCAATTGGCCAATTGGCCAAT")); }
#[test]
fn test_dedup_multithreaded_consistency() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("dedup_mt_R1.fq");
let r2_input = temp_dir.path().join("dedup_mt_R2.fq");
let r1_single = temp_dir.path().join("out_single_R1.fq");
let r2_single = temp_dir.path().join("out_single_R2.fq");
let r1_multi = temp_dir.path().join("out_multi_R1.fq");
let r2_multi = temp_dir.path().join("out_multi_R2.fq");
let r1_content = "@read1\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read2\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read3\nGGCCAATTGGCCAATTGGCCAATTGGCCAATG\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read4\nTTAAGGCCTTAAGGCCTTAAGGCCTTAAGGCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read5\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
let r2_content = "@read1\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read2\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read3\nCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read4\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n\
@read5\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n";
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let status_single = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_single)
.arg("-O")
.arg(&r2_single)
.arg("--dedup")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.arg("--threads")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp single-threaded");
assert!(status_single.success());
let status_multi = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_multi)
.arg("-O")
.arg(&r2_multi)
.arg("--dedup")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.arg("--threads")
.arg("4")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp multi-threaded");
assert!(status_multi.success());
let r1_single_out = fs::read_to_string(&r1_single).unwrap();
let r1_multi_out = fs::read_to_string(&r1_multi).unwrap();
let r1_single_lines: Vec<&str> = r1_single_out.lines().collect();
let r1_multi_lines: Vec<&str> = r1_multi_out.lines().collect();
assert_eq!(
r1_single_lines.len(),
12,
"Single-threaded should have 3 unique records (12 lines)"
);
assert_eq!(
r1_multi_lines.len(),
12,
"Multi-threaded should have 3 unique records (12 lines)"
);
}
#[test]
fn test_split_by_lines_basic() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let mut content = String::new();
for i in 1..=500 {
content.push_str(&format!(
"@read{i}\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&input_fq, content).unwrap();
let output_base = temp_dir.path().join("output.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_base)
.arg("--split-by-lines")
.arg("1000")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let file1 = temp_dir.path().join("0001.output.fq");
let file2 = temp_dir.path().join("0002.output.fq");
assert!(file1.exists(), "First split file should exist");
assert!(file2.exists(), "Second split file should exist");
let content1 = fs::read_to_string(&file1).unwrap();
let content2 = fs::read_to_string(&file2).unwrap();
assert_eq!(
content1.lines().count(),
1000,
"File 1 should have 1000 lines"
);
assert_eq!(
content2.lines().count(),
1000,
"File 2 should have 1000 lines"
);
}
#[test]
fn test_split_by_lines_two_reads_per_file() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let mut content = String::new();
for i in 1..=1000 {
content.push_str(&format!(
"@read{i}\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&input_fq, content).unwrap();
let output_base = temp_dir.path().join("output.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_base)
.arg("--split-by-lines")
.arg("2000")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let file1 = temp_dir.path().join("0001.output.fq");
let file2 = temp_dir.path().join("0002.output.fq");
assert!(file1.exists());
assert!(file2.exists());
assert_eq!(fs::read_to_string(&file1).unwrap().lines().count(), 2000);
assert_eq!(fs::read_to_string(&file2).unwrap().lines().count(), 2000);
}
#[test]
fn test_split_custom_prefix_digits() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let mut content = String::new();
for i in 1..=500 {
content.push_str(&format!(
"@read{i}\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&input_fq, content).unwrap();
let output_base = temp_dir.path().join("output.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_base)
.arg("--split-by-lines")
.arg("1000")
.arg("--split-prefix-digits")
.arg("2")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let file1 = temp_dir.path().join("01.output.fq");
let file2 = temp_dir.path().join("02.output.fq");
assert!(
file1.exists(),
"File with 2-digit prefix should exist: 01.output.fq"
);
assert!(
file2.exists(),
"File with 2-digit prefix should exist: 02.output.fq"
);
}
#[test]
fn test_split_no_padding() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let mut content = String::new();
for i in 1..=250 {
content.push_str(&format!(
"@read{i}\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&input_fq, content).unwrap();
let output_base = temp_dir.path().join("output.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_base)
.arg("--split-by-lines")
.arg("1000")
.arg("--split-prefix-digits")
.arg("0")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let file1 = temp_dir.path().join("1.output.fq");
assert!(
file1.exists(),
"File with no padding should exist: 1.output.fq"
);
}
#[test]
fn test_split_paired_end() {
let temp_dir = TempDir::new().unwrap();
let r1_input = temp_dir.path().join("r1.fq");
let r2_input = temp_dir.path().join("r2.fq");
let mut r1_content = String::new();
let mut r2_content = String::new();
for i in 1..=500 {
r1_content.push_str(&format!(
"@read{i}/1\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
r2_content.push_str(&format!(
"@read{i}/2\nTTTTAAAACCCCGGGGTTTTAAAACCCCGGGT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&r1_input, r1_content).unwrap();
fs::write(&r2_input, r2_content).unwrap();
let r1_output = temp_dir.path().join("r1_out.fq");
let r2_output = temp_dir.path().join("r2_out.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&r1_input)
.arg("-I")
.arg(&r2_input)
.arg("-o")
.arg(&r1_output)
.arg("-O")
.arg(&r2_output)
.arg("--split-by-lines")
.arg("1000")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let r1_file1 = temp_dir.path().join("0001.r1_out.fq");
let r1_file2 = temp_dir.path().join("0002.r1_out.fq");
let r2_file1 = temp_dir.path().join("0001.r2_out.fq");
let r2_file2 = temp_dir.path().join("0002.r2_out.fq");
assert!(r1_file1.exists(), "R1 file 1 should exist");
assert!(r1_file2.exists(), "R1 file 2 should exist");
assert!(r2_file1.exists(), "R2 file 1 should exist");
assert!(r2_file2.exists(), "R2 file 2 should exist");
assert_eq!(fs::read_to_string(&r1_file1).unwrap().lines().count(), 1000);
assert_eq!(fs::read_to_string(&r1_file2).unwrap().lines().count(), 1000);
assert_eq!(fs::read_to_string(&r2_file1).unwrap().lines().count(), 1000);
assert_eq!(fs::read_to_string(&r2_file2).unwrap().lines().count(), 1000);
}
#[test]
fn test_split_multithreaded() {
let temp_dir = TempDir::new().unwrap();
let input_fq = temp_dir.path().join("input.fq");
let mut content = String::new();
for i in 1..=600 {
content.push_str(&format!(
"@read{i}\nACGTACGTACGTACGTACGTACGTACGTACGA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
fs::write(&input_fq, content).unwrap();
let output_base = temp_dir.path().join("output.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_fq)
.arg("-o")
.arg(&output_base)
.arg("--split-by-lines")
.arg("1000")
.arg("--threads")
.arg("4")
.arg("--disable-length-filtering")
.arg("--disable-trim-tail")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let file1 = temp_dir.path().join("0001.output.fq");
let file2 = temp_dir.path().join("0002.output.fq");
let file3 = temp_dir.path().join("0003.output.fq");
assert!(file1.exists());
assert!(file2.exists());
assert!(file3.exists());
let total_lines = fs::read_to_string(&file1).unwrap().lines().count()
+ fs::read_to_string(&file2).unwrap().lines().count()
+ fs::read_to_string(&file3).unwrap().lines().count();
assert_eq!(total_lines, 2400, "Total lines should equal original input");
}
#[test]
fn test_minimal_adapter_detection() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_minimal.fq");
let seq = "AGCGGAATATATCCCGGCCTGCTTAAGGCATAGGATGTCTACTAATTTGAGAGTGTCGGGTTACACCCATATCCTGTTCACGGGAGCACACGAGGCTCCTATATTCACCGAACGTGGGTGTCCCTCCGAAGCCTATGAAGGATGATCAAG";
let qual = "?".repeat(seq.len()); fs::write(&test_input, format!("@seq.98\n{seq}\n+\n{qual}\n")).unwrap();
let fastp_out = temp_dir.path().join("fastp_minimal.fq");
let fasterp_out = temp_dir.path().join("fasterp_minimal.fq");
let fastp_json = temp_dir.path().join("fastp_minimal.json");
let fasterp_json = temp_dir.path().join("fasterp_minimal.json");
let output = Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-w")
.arg("1")
.arg("-L") .output()
.expect("Failed to run fastp");
println!("Fastp stderr: {}", String::from_utf8_lossy(&output.stderr));
assert!(output.status.success(), "Fastp failed");
let output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-L") .output()
.expect("Failed to run fasterp");
println!(
"Fasterp stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
assert!(output.status.success(), "Fasterp failed");
println!("Fastp output exists: {}", fastp_out.exists());
println!("Fasterp output exists: {}", fasterp_out.exists());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
println!("Fastp output length: {} bytes", fastp_content.len());
println!("Fastp output:\n{fastp_content}");
println!("\nFasterp output length: {} bytes", fasterp_content.len());
println!("Fasterp output:\n{fasterp_content}");
let fastp_seq = fastp_content
.lines()
.nth(1)
.expect("Fastp should have a sequence line");
let fasterp_seq = fasterp_content
.lines()
.nth(1)
.expect("Fasterp should have a sequence line");
println!("Fastp output length: {}", fastp_seq.len());
println!("Fastp output: {fastp_seq}");
println!();
println!("Fasterp output length: {}", fasterp_seq.len());
println!("Fasterp output: {fasterp_seq}");
assert_eq!(
fastp_seq, fasterp_seq,
"Adapter trimming should produce identical output.\nFastp: {fastp_seq}\nFasterp: {fasterp_seq}"
);
}
#[test]
fn test_new_paired_end_with_trimming_and_adapters() {
let temp_dir = TempDir::new().unwrap();
let test_r1 = temp_dir.path().join("test_r1.fq");
let test_r2 = temp_dir.path().join("test_r2.fq");
let r1_data = "@seq.556
CCGGTCAAGAGGTAAACAGCCGATGGTTAATCGTCTGTCCTTTAGGTAGTGCTCAACCAATTAGCGAAGTGAGTCATCGCTAAAACGCGCAGCTTAGGAGAATTCGGGTGGTAAAATGGAACTCAAACGTGTGATAGTTAGACGGAAGGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
let r2_data = "@seq.556
AAGGACGGGGCCTCCGACAGGAACTCCGCGCTTAGCCACGTGATCATTGCGAACCGATATGGATTTGGATTTGACGAACGGTGCGCTCAAGTCACCTAGCCCAGTTTTCCATGATCACAGCGTGACTTTTTTTTAGACGGGCATAGTGTC
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
fs::write(&test_r1, r1_data).unwrap();
fs::write(&test_r2, r2_data).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter1 = "AGATCGGAAGAGC";
let adapter2 = "AGATCGGAAGAGC";
let status = Command::new("fastp")
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
println!("=== FASTP R1 OUTPUT ===");
println!("{fastp_r1_content}");
println!("\n=== FASTERP R1 OUTPUT ===");
println!("{fasterp_r1_content}");
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
println!("\n=== FASTP R2 OUTPUT ===");
println!("{fastp_r2_content}");
println!("\n=== FASTERP R2 OUTPUT ===");
println!("{fasterp_r2_content}");
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_front_trim_fail() {
let temp_dir = TempDir::new().unwrap();
let test_r1 = temp_dir.path().join("test_r1.fq");
let test_r2 = temp_dir.path().join("test_r2.fq");
let r1_data = "@seq.8302
CAAAGACGGAACCGTGACACCTCCTGTTATCTACTCTTGTGGAACTTACTATTTACTCATCTGTCTGGTGAAACTCACGTTATGACATTTCAATTATTCAGGAGCCGAGAGGACAGCATAAGCCACAACCCGTTATTGGCAGGTGACAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
let r2_data = "@seq.8302
TGCAGGCACGGACGAACGTCTCACCGCCTGGCCATGAAAGCGGTAAATCGGACAAGGTTATCAGCTCTCATCGGCACTCTCGATTCGAACTCGGGTGCAGGTGCCGTAGCGGCCCTGCGGGGAGGTCGCGGTGGAGTTTCTCTGACCAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
fs::write(&test_r1, r1_data).unwrap();
fs::write(&test_r2, r2_data).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("--trim_front1")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
println!("\n=== Fastp output ===");
println!("{fastp_content}");
println!("\n=== Fasterp output ===");
println!("{fasterp_content}");
let fastp_seq = fastp_content.lines().nth(1).unwrap();
let fasterp_seq = fasterp_content.lines().nth(1).unwrap();
println!("\nFastp sequence length: {}", fastp_seq.len());
println!("Fasterp sequence length: {}", fasterp_seq.len());
println!(
"Difference: {} bases",
fasterp_seq.len() as i32 - fastp_seq.len() as i32
);
assert_eq!(
fastp_content,
fasterp_content,
"Adapter detection mismatch: fastp outputs {} bases, fasterp outputs {} bases",
fastp_seq.len(),
fasterp_seq.len()
);
}
#[test]
fn test_adapter_auto_detection_se() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_with_adapters.fq");
let adapter_seq = "CTGTCTCTTATACACATCTCCGAGCCCACGAGAC";
let mut test_data = String::new();
let bases = ['A', 'C', 'G', 'T'];
for i in 0..10000 {
test_data.push_str(&format!("@read_{i}\n"));
if i < 7000 {
let seq_len = 80 + (i % 40);
let sequence: String = (0..seq_len).map(|j| bases[(i + j) % 4]).collect();
let full_sequence = format!("{sequence}{adapter_seq}");
let quality = "I".repeat(full_sequence.len());
test_data.push_str(&full_sequence);
test_data.push_str("\n+\n");
test_data.push_str(&quality);
} else {
let sequence: String = (0..150).map(|j| bases[(i + j) % 4]).collect();
let quality = "I".repeat(150);
test_data.push_str(&sequence);
test_data.push_str("\n+\n");
test_data.push_str(&quality);
}
test_data.push('\n');
}
fs::write(&test_input, test_data).unwrap();
let output_with_detection = temp_dir.path().join("output_with_detection.fq");
let json_with_detection = temp_dir.path().join("with_detection.json");
let output_without_detection = temp_dir.path().join("output_without_detection.fq");
let json_without_detection = temp_dir.path().join("without_detection.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&output_with_detection)
.arg("-j")
.arg(&json_with_detection)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::piped())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with auto-detection");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&output_without_detection)
.arg("-j")
.arg(&json_without_detection)
.arg("-w")
.arg("1")
.arg("--disable_adapter_detection")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp without auto-detection");
assert!(status.success());
let content_with_detection = fs::read_to_string(&output_with_detection).unwrap();
let content_without_detection = fs::read_to_string(&output_without_detection).unwrap();
let total_bases_with: usize = content_with_detection
.lines()
.enumerate()
.filter(|(i, _)| i % 4 == 1) .map(|(_, line)| line.len())
.sum();
let total_bases_without: usize = content_without_detection
.lines()
.enumerate()
.filter(|(i, _)| i % 4 == 1) .map(|(_, line)| line.len())
.sum();
assert!(
total_bases_with < total_bases_without,
"Adapter trimming should reduce total bases. With detection: {total_bases_with} bp, without: {total_bases_without} bp"
);
let bases_trimmed = total_bases_without - total_bases_with;
assert!(
bases_trimmed > 200_000,
"Expected significant adapter trimming (>200k bases), got {bases_trimmed} bases trimmed"
);
let json_content = fs::read_to_string(&json_with_detection).unwrap();
let json_data: Value = serde_json::from_str(&json_content).expect("Invalid JSON");
let adapter_trimmed = &json_data["adapter_trimmed_reads"];
if !adapter_trimmed.is_null() {
let trimmed_count = adapter_trimmed.as_u64().unwrap_or(0);
assert!(
trimmed_count > 0,
"Expected adapter trimming to occur, but no reads were trimmed"
);
}
}
#[test]
fn test_adapter_auto_detection_matches_fastp() {
let temp_dir = TempDir::new().unwrap();
let test_input = temp_dir.path().join("test_with_adapters.fq");
let adapter_seq = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA";
let mut test_data = String::new();
let sequence = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; let full_sequence = format!("{sequence}{adapter_seq}"); let quality = "I".repeat(full_sequence.len());
for i in 0..10000 {
test_data.push_str(&format!("@read_{i}\n"));
test_data.push_str(&full_sequence);
test_data.push_str("\n+\n");
test_data.push_str(&quality);
test_data.push('\n');
}
fs::write(&test_input, test_data).unwrap();
let fastp_out = temp_dir.path().join("fastp_out.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp_out.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let fastp_output = Command::new("fastp")
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.output()
.expect("Failed to run fastp");
assert!(fastp_output.status.success(), "fastp failed to run");
let fastp_stderr = String::from_utf8_lossy(&fastp_output.stderr);
println!("Fastp stderr:\n{fastp_stderr}");
let fasterp_output = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_input)
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.output()
.expect("Failed to run fasterp");
assert!(fasterp_output.status.success(), "fasterp failed to run");
let fasterp_stderr = String::from_utf8_lossy(&fasterp_output.stderr);
println!("Fasterp stderr:\n{fasterp_stderr}");
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
let fastp_lines: Vec<&str> = fastp_content.lines().collect();
let fasterp_lines: Vec<&str> = fasterp_content.lines().collect();
assert!(!fastp_lines.is_empty(), "Fastp produced no output");
assert!(!fasterp_lines.is_empty(), "Fasterp produced no output");
let fastp_seq = fastp_lines[1];
let fasterp_seq = fasterp_lines[1];
println!("\nFastp first sequence length: {} bp", fastp_seq.len());
println!("Fasterp first sequence length: {} bp", fasterp_seq.len());
assert!(
fasterp_seq.len() >= 95 && fasterp_seq.len() <= 105,
"Fasterp output unexpected length: {} bp (expected ~100bp)",
fasterp_seq.len()
);
if fastp_seq.len() < 130 {
println!("✓ Fastp also detected the adapter");
let length_diff = (fastp_seq.len() as i32 - fasterp_seq.len() as i32).abs();
assert!(
length_diff <= 5,
"Sequence length difference too large: fastp={} bp, fasterp={} bp, diff={} bp",
fastp_seq.len(),
fasterp_seq.len(),
length_diff
);
} else {
println!("ℹ Fastp did not detect adapter in SE mode (uses overlap-based detection for PE)");
println!(" Fasterp uses k-mer frequency analysis which works for SE data");
}
let fastp_read_count = fastp_lines.len() / 4;
let fasterp_read_count = fasterp_lines.len() / 4;
assert_eq!(
fastp_read_count, 10000,
"Fastp output unexpected number of reads: {fastp_read_count}"
);
assert_eq!(
fasterp_read_count, 10000,
"Fasterp output unexpected number of reads: {fasterp_read_count}"
);
let fastp_json_content = fs::read_to_string(&fastp_json).unwrap();
let fasterp_json_content = fs::read_to_string(&fasterp_json).unwrap();
let fastp_data: Value = serde_json::from_str(&fastp_json_content).expect("Invalid fastp JSON");
let fasterp_data: Value =
serde_json::from_str(&fasterp_json_content).expect("Invalid fasterp JSON");
let fastp_adapter_trimmed = fastp_data["adapter_trimming"]["adapter_trimmed_reads"]
.as_u64()
.unwrap_or(0);
let fasterp_adapter_trimmed = fasterp_data["adapter_cutting"]["adapter_trimmed_reads"]
.as_u64()
.unwrap_or(0);
println!("\nAdapter trimming stats:");
println!("Fastp adapter trimmed reads: {fastp_adapter_trimmed}");
println!("Fasterp adapter trimmed reads: {fasterp_adapter_trimmed}");
assert!(
fasterp_adapter_trimmed > 0,
"Fasterp did not trim any adapters (detection failed)"
);
assert_eq!(
fasterp_adapter_trimmed, 10000,
"Expected 10000 reads to have adapters trimmed, got {fasterp_adapter_trimmed}"
);
if fastp_adapter_trimmed > 0 {
println!("✓ Both tools detected and trimmed adapters");
let trim_diff = (fastp_adapter_trimmed as i64 - fasterp_adapter_trimmed as i64).abs();
let trim_diff_percent = (trim_diff as f64 / fastp_adapter_trimmed as f64) * 100.0;
assert!(
trim_diff_percent <= 10.0,
"Adapter trimming difference too large: fastp={fastp_adapter_trimmed}, fasterp={fasterp_adapter_trimmed}, diff={trim_diff_percent}%"
);
} else {
println!("ℹ Fastp did not trim adapters (SE auto-detection not available)");
println!(" Fasterp uses k-mer frequency analysis which works for SE data");
}
println!("\n✓ Fasterp adapter auto-detection working correctly for SE data");
println!(" - Detected adapter in 200/200 reads");
println!(" - Output sequences trimmed from 134bp to ~100bp");
println!(" - JSON stats correctly populated");
}
#[test]
fn test_adapter_trimming_matches_fastp_base_count() {
let temp_dir = TempDir::new().unwrap();
let fasterp_output = temp_dir.path().join("fasterp_out.fq");
let fasterp_status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("adapter_trim_test.fastq"))
.arg("-o")
.arg(&fasterp_output)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(fasterp_status.success(), "fasterp command failed");
let fastp_output = temp_dir.path().join("fastp_out.fq");
let fastp_status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("adapter_trim_test.fastq"))
.arg("-o")
.arg(&fastp_output)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp - is it installed?");
assert!(fastp_status.success(), "fastp command failed");
let fasterp_content = fs::read_to_string(&fasterp_output).unwrap();
let fastp_content = fs::read_to_string(&fastp_output).unwrap();
let fasterp_bases: usize = fasterp_content
.lines()
.enumerate()
.filter(|(i, _)| i % 4 == 1) .map(|(_, line)| line.len())
.sum();
let fastp_bases: usize = fastp_content
.lines()
.enumerate()
.filter(|(i, _)| i % 4 == 1) .map(|(_, line)| line.len())
.sum();
println!("Fasterp total bases: {fasterp_bases}");
println!("Fastp total bases: {fastp_bases}");
let diff = fasterp_bases.abs_diff(fastp_bases);
assert!(
diff == 0,
"Base count mismatch: fasterp={fasterp_bases}, fastp={fastp_bases}, diff={diff} bases. \
This suggests different adapter trimming behavior between the tools."
);
}
#[test]
fn test_adapter_trimming_paired_end_custom_adapters() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter1 = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA";
let adapter2 = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT";
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_with_trimming_and_adapters() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter1 = "AGATCGGAAGAGC";
let adapter2 = "AGATCGGAAGAGC";
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg(adapter1)
.arg("--adapter_sequence_r2")
.arg(adapter2)
.arg("--trim_front1")
.arg("5")
.arg("--trim_tail1")
.arg("3")
.arg("-q")
.arg("20")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_asymmetric_front_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("--trim_front1")
.arg("10")
.arg("--trim_front2")
.arg("5")
.arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("--trim_front1")
.arg("10")
.arg("--trim_front2")
.arg("5")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match with asymmetric front trimming"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with asymmetric front trimming"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_paired_end_all_features_combined() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let _ = fs::remove_file(&fastp_r1);
let _ = fs::remove_file(&fastp_r2);
let _ = fs::remove_file(&fasterp_r1);
let _ = fs::remove_file(&fasterp_r2);
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("4")
.arg("--trim_tail2")
.arg("3")
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_medium_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_medium_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.arg("--trim_tail1")
.arg("2")
.arg("--trim_front2")
.arg("4")
.arg("--trim_tail2")
.arg("3")
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-l")
.arg("50")
.arg("-n")
.arg("5")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match with all features combined"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with all features combined"
);
compare_json_outputs(&fastp_json, &fasterp_json);
}
#[test]
fn test_adapter_detection_minimal_failing_case() {
let temp_dir = TempDir::new().unwrap();
let test_r1 = temp_dir.path().join("test_r1.fq");
let test_r2 = temp_dir.path().join("test_r2.fq");
let r1_data = "@seq.8302
CAAAGACGGAACCGTGACACCTCCTGTTATCTACTCTTGTGGAACTTACTATTTACTCATCTGTCTGGTGAAACTCACGTTATGACATTTCAATTATTCAGGAGCCGAGAGGACAGCATAAGCCACAACCCGTTATTGGCAGGTGACAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
let r2_data = "@seq.8302
TGCAGGCACGGACGAACGTCTCACCGCCTGGCCATGAAAGCGGTAAATCGGACAAGGTTATCAGCTCTCATCGGCACTCTCGATTCGAACTCGGGTGCAGGTGCCGTAGCGGCCCTGCGGGGAGGTCGCGGTGGAGTTTCTCTGACCAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
fs::write(&test_r1, r1_data).unwrap();
fs::write(&test_r2, r2_data).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("2")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("2")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
println!("\n=== Fastp output ===");
println!("{fastp_content}");
println!("\n=== Fasterp output ===");
println!("{fasterp_content}");
let fastp_seq = fastp_content.lines().nth(1).unwrap();
let fasterp_seq = fasterp_content.lines().nth(1).unwrap();
println!("\nFastp sequence length: {}", fastp_seq.len());
println!("Fasterp sequence length: {}", fasterp_seq.len());
println!(
"Difference: {} bases",
fasterp_seq.len() as i32 - fastp_seq.len() as i32
);
assert_eq!(
fastp_content,
fasterp_content,
"Adapter detection mismatch: fastp outputs {} bases, fasterp outputs {} bases",
fastp_seq.len(),
fasterp_seq.len()
);
}
#[test]
fn test_adapter_detection_minimal_failing_case_failing() {
let temp_dir = TempDir::new().unwrap();
let test_r1 = temp_dir.path().join("test_r1.fq");
let test_r2 = temp_dir.path().join("test_r2.fq");
let r1_data = "@seq.8302
CAAAGACGGAACCGTGACACCTCCTGTTATCTACTCTTGTGGAACTTACTATTTACTCATCTGTCTGGTGAAACTCACGTTATGACATTTCAATTATTCAGGAGCCGAGAGGACAGCATAAGCCACAACCCGTTATTGGCAGGTGACAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
let r2_data = "@seq.8302
TGCAGGCACGGACGAACGTCTCACCGCCTGGCCATGAAAGCGGTAAATCGGACAAGGTTATCAGCTCTCATCGGCACTCTCGATTCGAACTCGGGTGCAGGTGCCGTAGCGGCCCTGCGGGGAGGTCGCGGTGGAGTTTCTCTGACCAGT
+
??????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????
";
fs::write(&test_r1, r1_data).unwrap();
fs::write(&test_r2, r2_data).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&test_r1)
.arg("-I")
.arg(&test_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--adapter_sequence_r2")
.arg("AGATCGGAAGAGC")
.arg("--trim_front1")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
println!("\n=== Fastp output ===");
println!("{fastp_content}");
println!("\n=== Fasterp output ===");
println!("{fasterp_content}");
let fastp_seq = fastp_content.lines().nth(1).unwrap();
let fasterp_seq = fasterp_content.lines().nth(1).unwrap();
println!("\nFastp sequence length: {}", fastp_seq.len());
println!("Fasterp sequence length: {}", fasterp_seq.len());
println!(
"Difference: {} bases",
fasterp_seq.len() as i32 - fastp_seq.len() as i32
);
assert_eq!(
fastp_content,
fasterp_content,
"Adapter detection mismatch: fastp outputs {} bases, fasterp outputs {} bases",
fastp_seq.len(),
fasterp_seq.len()
);
}
#[test]
fn test_trim_tail_flag() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-t")
.arg("5")
.arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-t")
.arg("5")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
let fastp_seq = fastp_content.lines().nth(1).unwrap();
let fasterp_seq = fasterp_content.lines().nth(1).unwrap();
assert_eq!(fastp_seq.len(), 145, "fastp should trim 5 bases from tail");
assert_eq!(
fasterp_seq.len(),
145,
"fasterp should trim 5 bases from tail"
);
assert_eq!(
fastp_content, fasterp_content,
"R1 outputs don't match with -t 5 tail trimming"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with -t 5 tail trimming"
);
}
#[test]
fn test_max_len_flags() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-b")
.arg("100")
.arg("-B")
.arg("120")
.arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-b")
.arg("100")
.arg("-B")
.arg("120")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
let fastp_r1_seq = fastp_r1_content.lines().nth(1).unwrap();
let fasterp_r1_seq = fasterp_r1_content.lines().nth(1).unwrap();
assert_eq!(fastp_r1_seq.len(), 100, "fastp R1 should be max 100bp");
assert_eq!(fasterp_r1_seq.len(), 100, "fasterp R1 should be max 100bp");
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match with -b 100"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
let fastp_r2_seq = fastp_r2_content.lines().nth(1).unwrap();
let fasterp_r2_seq = fasterp_r2_content.lines().nth(1).unwrap();
assert_eq!(fastp_r2_seq.len(), 120, "fastp R2 should be max 120bp");
assert_eq!(fasterp_r2_seq.len(), 120, "fasterp R2 should be max 120bp");
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with -B 120"
);
}
#[test]
fn test_combined_front_tail_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-f")
.arg("10")
.arg("-t")
.arg("5")
.arg("-F")
.arg("15")
.arg("-T")
.arg("8")
.arg("-L") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-f")
.arg("10")
.arg("-t")
.arg("5")
.arg("-F")
.arg("15")
.arg("-T")
.arg("8")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_r1_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_r1_content = fs::read_to_string(&fasterp_r1).unwrap();
let fastp_r1_seq = fastp_r1_content.lines().nth(1).unwrap();
let fasterp_r1_seq = fasterp_r1_content.lines().nth(1).unwrap();
assert_eq!(
fastp_r1_seq.len(),
135,
"fastp R1 should be 135bp after trimming"
);
assert_eq!(
fasterp_r1_seq.len(),
135,
"fasterp R1 should be 135bp after trimming"
);
assert_eq!(
fastp_r1_content, fasterp_r1_content,
"R1 outputs don't match with combined trimming"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
let fastp_r2_seq = fastp_r2_content.lines().nth(1).unwrap();
let fasterp_r2_seq = fasterp_r2_content.lines().nth(1).unwrap();
assert_eq!(
fastp_r2_seq.len(),
127,
"fastp R2 should be 127bp after trimming"
);
assert_eq!(
fasterp_r2_seq.len(),
127,
"fasterp R2 should be 127bp after trimming"
);
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"R2 outputs don't match with combined trimming"
);
}
#[test]
fn test_thread_flag_consistency() {
let temp_dir = TempDir::new().unwrap();
let out1_r1 = temp_dir.path().join("out1_r1.fq");
let out1_r2 = temp_dir.path().join("out1_r2.fq");
let json1 = temp_dir.path().join("json1.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&out1_r1)
.arg("-O")
.arg(&out1_r2)
.arg("-j")
.arg(&json1)
.arg("-w")
.arg("1")
.arg("-f")
.arg("5")
.arg("-t")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with -w 1");
assert!(status.success());
let out4_r1 = temp_dir.path().join("out4_r1.fq");
let out4_r2 = temp_dir.path().join("out4_r2.fq");
let json4 = temp_dir.path().join("json4.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&out4_r1)
.arg("-O")
.arg(&out4_r2)
.arg("-j")
.arg(&json4)
.arg("-w")
.arg("4")
.arg("-f")
.arg("5")
.arg("-t")
.arg("3")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with -w 4");
assert!(status.success());
let content1 = fs::read_to_string(&out1_r1).unwrap();
let content4 = fs::read_to_string(&out4_r1).unwrap();
assert_eq!(
content1, content4,
"Thread count should not affect output (R1)"
);
let content1_r2 = fs::read_to_string(&out1_r2).unwrap();
let content4_r2 = fs::read_to_string(&out4_r2).unwrap();
assert_eq!(
content1_r2, content4_r2,
"Thread count should not affect output (R2)"
);
}
#[test]
fn test_max_len_with_adapter_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let adapter = "AGATCGGAAGAGC";
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-a")
.arg(adapter)
.arg("-b")
.arg("130")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-a")
.arg(adapter)
.arg("-b")
.arg("130")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Outputs don't match with adapter + max_len"
);
}
#[test]
fn test_single_end_comprehensive_trimming() {
let temp_dir = TempDir::new().unwrap();
let fastp_out = temp_dir.path().join("fastp.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_out = temp_dir.path().join("fasterp.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-o")
.arg(&fastp_out)
.arg("-j")
.arg(&fastp_json)
.arg("-f")
.arg("8")
.arg("-t")
.arg("6")
.arg("-b")
.arg("120")
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-o")
.arg(&fasterp_out)
.arg("-j")
.arg(&fasterp_json)
.arg("-f")
.arg("8")
.arg("-t")
.arg("6")
.arg("-b")
.arg("120")
.arg("-q")
.arg("20")
.arg("-l")
.arg("50")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_out).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_out).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Single-end comprehensive trimming outputs don't match"
);
}
#[test]
fn test_all_trimming_options_combined() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-f")
.arg("5")
.arg("-t")
.arg("3")
.arg("-F")
.arg("4")
.arg("-T")
.arg("2")
.arg("-b")
.arg("130")
.arg("-B")
.arg("135")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("-3") .arg("-M")
.arg("20")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-f")
.arg("5")
.arg("-t")
.arg("3")
.arg("-F")
.arg("4")
.arg("-T")
.arg("2")
.arg("-b")
.arg("130")
.arg("-B")
.arg("135")
.arg("-a")
.arg("AGATCGGAAGAGC")
.arg("--cut-tail")
.arg("--cut-mean-quality")
.arg("20")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"All trimming options combined - R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"All trimming options combined - R2 outputs don't match"
);
}
#[test]
fn test_quality_n_base_length_filters_combined() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-n")
.arg("3")
.arg("-e")
.arg("25")
.arg("-l")
.arg("80")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-q")
.arg("20")
.arg("-u")
.arg("30")
.arg("-n")
.arg("3")
.arg("-e")
.arg("25")
.arg("-l")
.arg("80")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Combined quality filters - R1 outputs don't match"
);
let fastp_r2_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_r2_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_r2_content, fasterp_r2_content,
"Combined quality filters - R2 outputs don't match"
);
}
#[test]
fn test_thread_and_tail_trim_no_conflict() {
let temp_dir = TempDir::new().unwrap();
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("2")
.arg("-t")
.arg("10")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp with -w and -t");
assert!(status.success());
let content = fs::read_to_string(&fasterp_r1).unwrap();
let first_seq = content.lines().nth(1).unwrap();
assert_eq!(first_seq.len(), 140, "-t should trim tail, not set threads");
}
#[test]
fn test_max_len_shorter_than_trimmed_read() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-f")
.arg("10")
.arg("-t")
.arg("10")
.arg("-b")
.arg("50")
.arg("-B")
.arg("60")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-f")
.arg("10")
.arg("-t")
.arg("10")
.arg("-b")
.arg("50")
.arg("-B")
.arg("60")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"max_len after trimming - R1 outputs don't match"
);
let fastp_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"max_len after trimming - R2 outputs don't match"
);
let r1_content = fs::read_to_string(&fasterp_r1).unwrap();
let r1_seq = r1_content.lines().nth(1).unwrap();
assert_eq!(r1_seq.len(), 50, "R1 should be trimmed to max_len1=50");
let r2_content = fs::read_to_string(&fasterp_r2).unwrap();
let r2_seq = r2_content.lines().nth(1).unwrap();
assert_eq!(r2_seq.len(), 60, "R2 should be trimmed to max_len2=60");
}
#[test]
fn test_large_trim_values() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-f")
.arg("25")
.arg("-t")
.arg("30")
.arg("-F")
.arg("28")
.arg("-T")
.arg("32")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-f")
.arg("25")
.arg("-t")
.arg("30")
.arg("-F")
.arg("28")
.arg("-T")
.arg("32")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Large trim values - R1 outputs don't match"
);
let fastp_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Large trim values - R2 outputs don't match"
);
let r1_content = fs::read_to_string(&fasterp_r1).unwrap();
let r1_seq = r1_content.lines().nth(1).unwrap();
assert_eq!(r1_seq.len(), 95, "R1 should be 95bp after large trimming");
let r2_content = fs::read_to_string(&fasterp_r2).unwrap();
let r2_seq = r2_content.lines().nth(1).unwrap();
assert_eq!(r2_seq.len(), 90, "R2 should be 90bp after large trimming");
}
#[test]
fn test_asymmetric_max_len_extreme() {
let temp_dir = TempDir::new().unwrap();
let fastp_r1 = temp_dir.path().join("fastp_r1.fq");
let fastp_r2 = temp_dir.path().join("fastp_r2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_r1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_r2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-b")
.arg("30")
.arg("-B")
.arg("140")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success());
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(test_data_path("pe_small_R1.fq"))
.arg("-I")
.arg(test_data_path("pe_small_R2.fq"))
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-b")
.arg("30")
.arg("-B")
.arg("140")
.arg("-L")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success());
let fastp_content = fs::read_to_string(&fastp_r1).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r1).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Asymmetric max_len - R1 outputs don't match"
);
let fastp_content = fs::read_to_string(&fastp_r2).unwrap();
let fasterp_content = fs::read_to_string(&fasterp_r2).unwrap();
assert_eq!(
fastp_content, fasterp_content,
"Asymmetric max_len - R2 outputs don't match"
);
let r1_content = fs::read_to_string(&fasterp_r1).unwrap();
let r1_seq = r1_content.lines().nth(1).unwrap();
assert_eq!(r1_seq.len(), 30, "R1 should be 30bp");
let r2_content = fs::read_to_string(&fasterp_r2).unwrap();
let r2_seq = r2_content.lines().nth(1).unwrap();
assert_eq!(r2_seq.len(), 140, "R2 should be 140bp");
}
fn run_fastp_merge(
input1: &str,
input2: &str,
temp_dir: &TempDir,
include_unmerged: bool,
) -> (PathBuf, PathBuf) {
let merged_out = temp_dir.path().join("fastp_merged.fq");
let output_json = temp_dir.path().join("fastp_merge.json");
let output1 = temp_dir.path().join("fastp_merge_R1.fq");
let output2 = temp_dir.path().join("fastp_merge_R2.fq");
let mut cmd = Command::new("fastp");
cmd.arg("-i")
.arg(test_data_path(input1))
.arg("-I")
.arg(test_data_path(input2))
.arg("-o")
.arg(&output1)
.arg("-O")
.arg(&output2)
.arg("--merge")
.arg("--merged_out")
.arg(&merged_out)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null());
if include_unmerged {
cmd.arg("--include_unmerged");
}
let status = cmd
.status()
.expect("Failed to run fastp merge - is it installed?");
assert!(status.success(), "fastp merge command failed");
(merged_out, output_json)
}
fn run_fasterp_merge(
input1: &str,
input2: &str,
temp_dir: &TempDir,
include_unmerged: bool,
) -> (PathBuf, PathBuf) {
let merged_out = temp_dir.path().join("fasterp_merged.fq");
let output_json = temp_dir.path().join("fasterp_merge.json");
let output1 = temp_dir.path().join("fasterp_merge_R1.fq");
let output2 = temp_dir.path().join("fasterp_merge_R2.fq");
let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"));
cmd.arg("-i")
.arg(test_data_path(input1))
.arg("-I")
.arg(test_data_path(input2))
.arg("-o")
.arg(&output1)
.arg("-O")
.arg(&output2)
.arg("-m")
.arg("--merged-out")
.arg(&merged_out)
.arg("-j")
.arg(&output_json)
.arg("-w")
.arg("1")
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null());
if include_unmerged {
cmd.arg("--include-unmerged");
}
let status = cmd.status().expect("Failed to run fasterp merge");
assert!(status.success(), "fasterp merge command failed");
(merged_out, output_json)
}
fn count_fastq_reads(path: &PathBuf) -> usize {
let content = fs::read_to_string(path).expect("Failed to read FASTQ file");
content.lines().count() / 4
}
fn parse_fastq(path: &PathBuf) -> Vec<(String, String, String)> {
let content = fs::read_to_string(path).expect("Failed to read FASTQ file");
let lines: Vec<&str> = content.lines().collect();
let mut reads = Vec::new();
for chunk in lines.chunks(4) {
if chunk.len() == 4 {
reads.push((
chunk[0].to_string(),
chunk[1].to_string(),
chunk[3].to_string(),
));
}
}
reads
}
#[test]
fn test_merge_basic_parity_with_fastp() {
let temp_dir = TempDir::new().unwrap();
let (fastp_merged, _fastp_json) =
run_fastp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, false);
let (fasterp_merged, _fasterp_json) =
run_fasterp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, false);
assert!(fastp_merged.exists(), "fastp merged file doesn't exist");
assert!(fasterp_merged.exists(), "fasterp merged file doesn't exist");
let fastp_count = count_fastq_reads(&fastp_merged);
let fasterp_count = count_fastq_reads(&fasterp_merged);
println!("fastp merged reads: {fastp_count}");
println!("fasterp merged reads: {fasterp_count}");
assert!(fastp_count > 0, "fastp should have merged some reads");
assert!(fasterp_count > 0, "fasterp should have merged some reads");
let diff_ratio =
((fastp_count as f64 - fasterp_count as f64).abs() / fastp_count as f64) * 100.0;
assert!(
diff_ratio < 10.0,
"Merged read counts differ by more than 10%: fastp={fastp_count}, fasterp={fasterp_count}, diff={diff_ratio}%"
);
}
#[test]
fn test_merge_with_include_unmerged() {
let temp_dir = TempDir::new().unwrap();
let (fastp_merged, _) =
run_fastp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, true);
let (fasterp_merged, _) =
run_fasterp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, true);
let fastp_count = count_fastq_reads(&fastp_merged);
let fasterp_count = count_fastq_reads(&fasterp_merged);
println!("fastp merged+unmerged reads: {fastp_count}");
println!("fasterp merged+unmerged reads: {fasterp_count}");
assert!(fastp_count > 0);
assert!(fasterp_count > 0);
let diff_ratio =
((fastp_count as f64 - fasterp_count as f64).abs() / fastp_count as f64) * 100.0;
assert!(
diff_ratio < 10.0,
"Merged+unmerged read counts differ by more than 10%: fastp={fastp_count}, fasterp={fasterp_count}, diff={diff_ratio}%"
);
}
#[test]
fn test_merge_header_format() {
let temp_dir = TempDir::new().unwrap();
let (fasterp_merged, _) =
run_fasterp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, false);
let reads = parse_fastq(&fasterp_merged);
assert!(!reads.is_empty(), "Should have at least one merged read");
for (header, seq, qual) in &reads {
assert!(
header.contains("merged_"),
"Header should contain 'merged_' tag: {header}"
);
assert_eq!(
seq.len(),
qual.len(),
"Sequence and quality lengths should match"
);
assert!(!seq.is_empty(), "Merged sequence should not be empty");
}
}
#[test]
fn test_merge_larger_dataset() {
let temp_dir = TempDir::new().unwrap();
let (fastp_merged, _) = run_fastp_merge(
"merge_test_10k_R1.fq",
"merge_test_10k_R2.fq",
&temp_dir,
false,
);
let (fasterp_merged, _) = run_fasterp_merge(
"merge_test_10k_R1.fq",
"merge_test_10k_R2.fq",
&temp_dir,
false,
);
let fastp_count = count_fastq_reads(&fastp_merged);
let fasterp_count = count_fastq_reads(&fasterp_merged);
println!("fastp merged reads (10k): {fastp_count}");
println!("fasterp merged reads (10k): {fasterp_count}");
assert!(
fastp_count > 100,
"Should merge a significant number of reads"
);
assert!(
fasterp_count > 100,
"Should merge a significant number of reads"
);
let diff_ratio =
((fastp_count as f64 - fasterp_count as f64).abs() / fastp_count as f64) * 100.0;
assert!(
diff_ratio < 10.0,
"Large dataset merge counts differ by more than 10%: fastp={fastp_count}, fasterp={fasterp_count}, diff={diff_ratio}%"
);
}
#[test]
fn test_merge_quality_scores_preserved() {
let temp_dir = TempDir::new().unwrap();
let (fasterp_merged, _) =
run_fasterp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, false);
let reads = parse_fastq(&fasterp_merged);
for (header, seq, qual) in &reads {
for &byte in qual.as_bytes() {
assert!(
(33..=126).contains(&byte),
"Invalid quality score in {header}: {byte}"
);
}
assert_eq!(seq.len(), qual.len());
}
}
#[test]
fn test_merge_vs_no_merge_read_preservation() {
let temp_dir = TempDir::new().unwrap();
let (merged_all, _) =
run_fasterp_merge("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir, true);
let (r1_out, r2_out, _) = run_fasterp_pe("merge_test_R1.fq", "merge_test_R2.fq", &temp_dir);
let merged_all_count = count_fastq_reads(&merged_all);
let r1_count = count_fastq_reads(&r1_out);
let r2_count = count_fastq_reads(&r2_out);
println!("Merged (with unmerged): {merged_all_count} reads");
println!(
"Normal PE: {} + {} = {} reads",
r1_count,
r2_count,
r1_count + r2_count
);
let expected_total = r1_count + r2_count;
let diff = (merged_all_count as i64 - expected_total as i64).abs();
assert!(
diff < 10,
"Total reads should be preserved: merged_all={merged_all_count}, r1+r2={expected_total}, diff={diff}"
);
}
#[ignore]
#[test]
fn test_srr30151536_gzipped_paired_end() {
let temp_dir = TempDir::new().unwrap();
let input1 = test_data_path("../SRR30151536/SRR30151536_1.fastq.gz");
let input2 = test_data_path("../SRR30151536/SRR30151536_2.fastq.gz");
if !input1.exists() || !input2.exists() {
eprintln!("Skipping test: SRR30151536 dataset not found");
return;
}
let fastp_r1 = temp_dir.path().join("fastp_out1.fastq.gz");
let fastp_r2 = temp_dir.path().join("fastp_out2.fastq.gz");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_out1.fastq.gz");
let fasterp_r2 = temp_dir.path().join("fasterp_out2.fastq.gz");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input1)
.arg("-I")
.arg(&input2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success(), "fastp failed on SRR30151536 dataset");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input1)
.arg("-I")
.arg(&input2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "fasterp failed on SRR30151536 dataset");
assert!(fastp_r1.exists(), "fastp R1 output not created");
assert!(fastp_r2.exists(), "fastp R2 output not created");
assert!(fasterp_r1.exists(), "fasterp R1 output not created");
assert!(fasterp_r2.exists(), "fasterp R2 output not created");
compare_json_outputs(&fastp_json, &fasterp_json);
let fastp_r1_size = fs::metadata(&fastp_r1).unwrap().len();
let fasterp_r1_size = fs::metadata(&fasterp_r1).unwrap().len();
let fastp_r2_size = fs::metadata(&fastp_r2).unwrap().len();
let fasterp_r2_size = fs::metadata(&fasterp_r2).unwrap().len();
assert!(fastp_r1_size > 0, "fastp R1 output is empty");
assert!(fasterp_r1_size > 0, "fasterp R1 output is empty");
assert!(fastp_r2_size > 0, "fastp R2 output is empty");
assert!(fasterp_r2_size > 0, "fasterp R2 output is empty");
let r1_size_diff_pct =
((fastp_r1_size as f64 - fasterp_r1_size as f64).abs() / fastp_r1_size as f64) * 100.0;
let r2_size_diff_pct =
((fastp_r2_size as f64 - fasterp_r2_size as f64).abs() / fastp_r2_size as f64) * 100.0;
assert!(
r1_size_diff_pct < 10.0,
"R1 output sizes differ by more than 10%: fastp={fastp_r1_size}, fasterp={fasterp_r1_size}, diff={r1_size_diff_pct}%"
);
assert!(
r2_size_diff_pct < 10.0,
"R2 output sizes differ by more than 10%: fastp={fastp_r2_size}, fasterp={fasterp_r2_size}, diff={r2_size_diff_pct}%"
);
}
#[ignore]
#[test]
fn test_srr22472290_gzipped_paired_end() {
let temp_dir = TempDir::new().unwrap();
let input1 = test_data_path("../SRR22472290/SRR22472290_1.fastq.gz");
let input2 = test_data_path("../SRR22472290/SRR22472290_2.fastq.gz");
if !input1.exists() || !input2.exists() {
eprintln!("Skipping test: SRR22472290 dataset not found");
return;
}
let fastp_r1 = temp_dir.path().join("fastp_out1.fastq.gz");
let fastp_r2 = temp_dir.path().join("fastp_out2.fastq.gz");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_out1.fastq.gz");
let fasterp_r2 = temp_dir.path().join("fasterp_out2.fastq.gz");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input1)
.arg("-I")
.arg(&input2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success(), "fastp failed on SRR22472290 dataset");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input1)
.arg("-I")
.arg(&input2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "fasterp failed on SRR22472290 dataset");
assert!(fastp_r1.exists(), "fastp R1 output not created");
assert!(fastp_r2.exists(), "fastp R2 output not created");
assert!(fasterp_r1.exists(), "fasterp R1 output not created");
assert!(fasterp_r2.exists(), "fasterp R2 output not created");
compare_json_outputs(&fastp_json, &fasterp_json);
let fastp_r1_size = fs::metadata(&fastp_r1).unwrap().len();
let fasterp_r1_size = fs::metadata(&fasterp_r1).unwrap().len();
let fastp_r2_size = fs::metadata(&fastp_r2).unwrap().len();
let fasterp_r2_size = fs::metadata(&fasterp_r2).unwrap().len();
assert!(fastp_r1_size > 0, "fastp R1 output is empty");
assert!(fasterp_r1_size > 0, "fasterp R1 output is empty");
assert!(fastp_r2_size > 0, "fastp R2 output is empty");
assert!(fasterp_r2_size > 0, "fasterp R2 output is empty");
let r1_size_diff_pct =
((fastp_r1_size as f64 - fasterp_r1_size as f64).abs() / fastp_r1_size as f64) * 100.0;
let r2_size_diff_pct =
((fastp_r2_size as f64 - fasterp_r2_size as f64).abs() / fastp_r2_size as f64) * 100.0;
assert!(
r1_size_diff_pct < 10.0,
"R1 output sizes differ by more than 10%: fastp={fastp_r1_size}, fasterp={fasterp_r1_size}, diff={r1_size_diff_pct}%"
);
assert!(
r2_size_diff_pct < 10.0,
"R2 output sizes differ by more than 10%: fastp={fastp_r2_size}, fasterp={fasterp_r2_size}, diff={r2_size_diff_pct}%"
);
}
#[test]
fn test_problematic_read_srr22472290_464() {
let temp_dir = TempDir::new().unwrap();
let r1_fastq = r"@SRR22472290.464 NB502048:545:HN3F3AFX2:1:11103:15993:2863/1
GACTGGGGAGACGCCGAGTGAGGGCGAGCGCTGCTGTGGCG
+
AAAA/EA/E6EA/AE/E6E/EEA/EAE/EAE/EE//EE/EE
";
let r2_fastq = r"@SRR22472290.464 NB502048:545:HN3F3AFX2:1:11103:15993:2863/2
CGCCACAGCCGCGCTCGCCCTCACTCGGCGTCTACCCAGTCCTGTCTATTATACACAAAAGACGATGCCGACGAA
+
A/A//EEE/EA<EE/EEEEAA<EE//AEEEAAE/EE/EAEEAAA//A////AE/A<A//////////E///////
";
let input_r1 = temp_dir.path().join("problematic_r1.fq");
let input_r2 = temp_dir.path().join("problematic_r2.fq");
fs::write(&input_r1, r1_fastq).unwrap();
fs::write(&input_r2, r2_fastq).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_out1.fq");
let fastp_r2 = temp_dir.path().join("fastp_out2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_out1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_out2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_r1)
.arg("-I")
.arg(&input_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success(), "fastp failed on problematic read");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_r1)
.arg("-I")
.arg(&input_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "fasterp failed on problematic read");
let fastp_content = fs::read_to_string(&fastp_json).expect("Failed to read fastp JSON");
let fasterp_content = fs::read_to_string(&fasterp_json).expect("Failed to read fasterp JSON");
let fastp_data: Value = serde_json::from_str(&fastp_content).expect("Invalid fastp JSON");
let fasterp_data: Value = serde_json::from_str(&fasterp_content).expect("Invalid fasterp JSON");
let fastp_passed = fastp_data["filtering_result"]["passed_filter_reads"]
.as_u64()
.expect("fastp passed_filter_reads not found");
let fasterp_passed = fasterp_data["filtering_result"]["passed_filter_reads"]
.as_u64()
.expect("fasterp passed_filter_reads not found");
assert_eq!(
fastp_passed, fasterp_passed,
"Failed: passed_filter_reads do not match"
);
}
#[test]
fn test_problematic_read_srr22472290_464_single_threaded() {
let temp_dir = TempDir::new().unwrap();
let r1_fastq = r"@SRR22472290.464 NB502048:545:HN3F3AFX2:1:11103:15993:2863/1
GACTGGGGAGACGCCGAGTGAGGGCGAGCGCTGCTGTGGCG
+
AAAA/EA/E6EA/AE/E6E/EEA/EAE/EAE/EE//EE/EE
";
let r2_fastq = r"@SRR22472290.464 NB502048:545:HN3F3AFX2:1:11103:15993:2863/2
CGCCACAGCCGCGCTCGCCCTCACTCGGCGTCTACCCAGTCCTGTCTATTATACACAAAAGACGATGCCGACGAA
+
A/A//EEE/EA<EE/EEEEAA<EE//AEEEAAE/EE/EAEEAAA//A////AE/A<A//////////E///////
";
let input_r1 = temp_dir.path().join("problematic_r1.fq");
let input_r2 = temp_dir.path().join("problematic_r2.fq");
fs::write(&input_r1, r1_fastq).unwrap();
fs::write(&input_r2, r2_fastq).unwrap();
let fastp_r1 = temp_dir.path().join("fastp_out1.fq");
let fastp_r2 = temp_dir.path().join("fastp_out2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let fasterp_r1 = temp_dir.path().join("fasterp_out1.fq");
let fasterp_r2 = temp_dir.path().join("fasterp_out2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&input_r1)
.arg("-I")
.arg(&input_r2)
.arg("-o")
.arg(&fastp_r1)
.arg("-O")
.arg(&fastp_r2)
.arg("-j")
.arg(&fastp_json)
.arg("-w")
.arg("1") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp");
assert!(status.success(), "fastp failed on problematic read");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.arg("-i")
.arg(&input_r1)
.arg("-I")
.arg(&input_r2)
.arg("-o")
.arg(&fasterp_r1)
.arg("-O")
.arg(&fasterp_r2)
.arg("-j")
.arg(&fasterp_json)
.arg("-w")
.arg("1") .stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fasterp");
assert!(status.success(), "fasterp failed on problematic read");
let fastp_content = fs::read_to_string(&fastp_json).expect("Failed to read fastp JSON");
let fasterp_content = fs::read_to_string(&fasterp_json).expect("Failed to read fasterp JSON");
let fastp_data: Value = serde_json::from_str(&fastp_content).expect("Invalid fastp JSON");
let fasterp_data: Value = serde_json::from_str(&fasterp_content).expect("Invalid fasterp JSON");
let fastp_passed = fastp_data["filtering_result"]["passed_filter_reads"]
.as_u64()
.expect("fastp passed_filter_reads not found");
let fasterp_passed = fasterp_data["filtering_result"]["passed_filter_reads"]
.as_u64()
.expect("fasterp passed_filter_reads not found");
assert_eq!(
fastp_passed, fasterp_passed,
"Failed: passed_filter_reads do not match"
);
}
#[test]
fn test_three_quality_filtered_reads() {
let r1_data = "\
@SRR22472290.18698 NB502048:545:HN3F3AFX2:1:21309:11658:19906/1
ATTTTAAAGGCACTGATAGGGTTGAAATAGGAGTGGCCAGCAGGGGGCTTTTTAGACTTCATCAACCTTCC
+
AAAAAE/EAE/EEAAEEEAEAA/AA/EEEE/A/EE///AAE//6///E/E/AEAE/E/<E<EE/EEE/EAA
@SRR22472290.18990 NB502048:545:HN3F3AFX2:1:21310:24205:20308/1
GCCCTGGCCGGCCCGCGGGGCGCAGAGAGCGGCTGTGCGGCGCGCGCCCCGCCCCACCCGGGTCTTTTTAAACAA
+
/AAAA6EEEE6EEEEE/AEE/EAE6EA6/EAE<EEEEEEEE6E/AA////EE/E//////////////<E/////
@SRR22472290.62665 NB502048:545:HN3F3AFX2:4:11508:25760:15681/1
GGCTTACAATGCTCACCTTAAATACTTCCCCACAGAAAACCAACTCAGAAACTCGCTTGCTATTACTCCTCTCT
+
/AA/AEEEEE6EE6AA/<<A/E/E<////<A/EE/EE6EE<E/E/6E/E/E//A//<//E//EAEAEA//6EA<
";
let r2_data = "\
@SRR22472290.18698 NB502048:545:HN3F3AFX2:1:21309:11658:19906/2
GTAAGGTGGATGAAGTCTAAAAAGCCACCTGCTGGCCACACCTATTTCCAACCTAACAGTGCCTTTAAAATATGT
+
6/AA6EE66EAEEEEEE/EEA6AAEE///EEE///EE/E/E//A/E/////AE/E/A<//</6//EA/</////<
@SRR22472290.18990 NB502048:545:HN3F3AFX2:1:21310:24205:20308/2
CGAGCGCCGTGCGCGCGCCCCACAGCCGCTCTCTGCGCCCCGCGGGCCGGCCAGGGCCGGTGTCATTTTCACCTA
+
AAAAAEE<EEEEAAEE/EA/E/A/E/AAE//////A/<EEA/E/EE/E/A/E//<E/A///////A/////E//<
@SRR22472290.62665 NB502048:545:HN3F3AFX2:4:11508:25760:15681/2
GACTAAAACCAAGCGAGAATCTGAGATGGTTTTCTGTGGGGAAGTATATAAGGTCAGCATTGTAACCCCTGACT
+
/A6AAEEE6EE/E//AA//E<EAEE/A///E///<EEA6E/EEEEAA//A/////EAEE////EE//</E///E<
";
let temp_dir = tempfile::tempdir().unwrap();
let r1_path = temp_dir.path().join("three_reads_r1.fq");
let r2_path = temp_dir.path().join("three_reads_r2.fq");
let fasterp_out1 = temp_dir.path().join("fasterp_out1.fq");
let fasterp_out2 = temp_dir.path().join("fasterp_out2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let fastp_out1 = temp_dir.path().join("fastp_out1.fq");
let fastp_out2 = temp_dir.path().join("fastp_out2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let fastp_status = std::process::Command::new("fastp")
.arg("-i")
.arg(&r1_path)
.arg("-I")
.arg(&r2_path)
.arg("-o")
.arg(&fastp_out1)
.arg("-O")
.arg(&fastp_out2)
.arg("-j")
.arg(&fastp_json)
.stderr(std::process::Stdio::null())
.output()
.expect("Failed to run fastp");
assert!(
fastp_status.status.success(),
"fastp failed: {}",
String::from_utf8_lossy(&fastp_status.stderr)
);
let fasterp_status = std::process::Command::new(env!("CARGO_BIN_EXE_fasterp"))
.arg("-i")
.arg(&r1_path)
.arg("-I")
.arg(&r2_path)
.arg("-o")
.arg(&fasterp_out1)
.arg("-O")
.arg(&fasterp_out2)
.arg("-j")
.arg(&fasterp_json)
.stdout(std::process::Stdio::null())
.output()
.expect("Failed to run fasterp");
assert!(
fasterp_status.status.success(),
"fasterp failed: {}",
String::from_utf8_lossy(&fasterp_status.stderr)
);
let fastp_json_str = std::fs::read_to_string(&fastp_json).unwrap();
let fastp_json_val: serde_json::Value = serde_json::from_str(&fastp_json_str).unwrap();
let fastp_passed = fastp_json_val["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap();
let fasterp_json_str = std::fs::read_to_string(&fasterp_json).unwrap();
let fasterp_json_val: serde_json::Value = serde_json::from_str(&fasterp_json_str).unwrap();
let fasterp_passed = fasterp_json_val["filtering_result"]["passed_filter_reads"]
.as_u64()
.unwrap();
println!("fastp passed: {fastp_passed}");
println!("fasterp passed: {fasterp_passed}");
assert_eq!(
fastp_passed, fasterp_passed,
"Failed: fastp passed {fastp_passed} reads, but fasterp passed {fasterp_passed} reads"
);
}
#[test]
fn test_short_insert_heavy_overlap() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@read2/1
GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTGGGGGGGGGGGGGGGGGGGGGGGG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@read2/2
CCCCCCCCCCCCCCCCCCCCCCCCAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("short_insert_r1.fq");
let r2_path = temp_dir.path().join("short_insert_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 4, "Should process reads successfully");
}
#[test]
fn test_minimal_overlap_detection() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("min_overlap_r1.fq");
let r2_path = temp_dir.path().join("min_overlap_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed with 30bp overlap");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_below_min_overlap_no_detection() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTACCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("below_min_r1.fq");
let r2_path = temp_dir.path().join("below_min_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert_eq!(
passed, 2,
"Should pass reads even without overlap detection"
);
}
#[test]
fn test_overlap_with_quality_filtering() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTTTTTTTTTTTTTTT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII!!!!!!!!!!!!!!
";
let r2_data = "\
@read1/2
AAAAAAAAAAAAAAACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
!!!!!!!!!!!!!!IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("qual_filter_r1.fq");
let r2_path = temp_dir.path().join("qual_filter_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
"-q",
"20", ])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_negative_offset_trimming() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@read2/1
GCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
AAAAAAAAAAAAAAAAAAAAAACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@read2/2
TTTTTTTTTTTTTTTTTTTTTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("neg_offset_r1.fq");
let r2_path = temp_dir.path().join("neg_offset_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 4, "Should process reads successfully");
}
#[test]
fn test_positive_offset_no_overlap_trimming() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
AAAAAAAAAAAAAAAAAAAAAACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("pos_offset_r1.fq");
let r2_path = temp_dir.path().join("pos_offset_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let out1_content = std::fs::read_to_string(&out1).unwrap();
let lines1: Vec<&str> = out1_content.lines().collect();
if lines1.len() >= 2 {
let seq = lines1[1];
assert_eq!(
seq.len(),
70,
"R1 should not be trimmed for positive offset"
);
}
}
#[test]
fn test_multithreaded_overlap_consistency() {
let temp_dir = TempDir::new().unwrap();
let mut r1_data = String::new();
let mut r2_data = String::new();
for i in 0..100 {
r1_data.push_str(&format!(
"@read{i}/1\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAAAAAAAAAAAAAAAA\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
r2_data.push_str(&format!(
"@read{i}/2\nTTTTTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n+\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
let r1_path = temp_dir.path().join("mt_r1.fq");
let r2_path = temp_dir.path().join("mt_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1_st = temp_dir.path().join("out1_st.fq");
let out2_st = temp_dir.path().join("out2_st.fq");
let json_st = temp_dir.path().join("st.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1_st.to_str().unwrap(),
"-O",
out2_st.to_str().unwrap(),
"-j",
json_st.to_str().unwrap(),
"-w",
"1",
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp single-threaded");
assert!(status.success());
let out1_mt = temp_dir.path().join("out1_mt.fq");
let out2_mt = temp_dir.path().join("out2_mt.fq");
let json_mt = temp_dir.path().join("mt.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1_mt.to_str().unwrap(),
"-O",
out2_mt.to_str().unwrap(),
"-j",
json_mt.to_str().unwrap(),
"-w",
"4",
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp multi-threaded");
assert!(status.success());
let json_st_content = std::fs::read_to_string(&json_st).unwrap();
let json_mt_content = std::fs::read_to_string(&json_mt).unwrap();
let json_st_val: serde_json::Value = serde_json::from_str(&json_st_content).unwrap();
let json_mt_val: serde_json::Value = serde_json::from_str(&json_mt_content).unwrap();
let st_passed = json_st_val["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap_or(0);
let mt_passed = json_mt_val["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap_or(0);
assert!(st_passed <= 200, "Single-threaded should process reads");
assert!(mt_passed <= 200, "Multi-threaded should process reads");
}
#[test]
fn test_lenient_mode_integration() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACCTACCTACCT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("lenient_r1.fq");
let r2_path = temp_dir.path().join("lenient_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed with lenient mode");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_high_diff_short_overlap_rejection() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("high_diff_r1.fq");
let r2_path = temp_dir.path().join("high_diff_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let adapter_trimmed = json["filtering_result"]["adapter_trimmed_reads"]
.as_u64()
.unwrap_or(0);
assert_eq!(
adapter_trimmed, 0,
"Should NOT detect overlap with too many differences"
);
}
#[test]
fn test_perfect_overlap_zero_differences() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("perfect_r1.fq");
let r2_path = temp_dir.path().join("perfect_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with perfect overlap"
);
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_overlap_exactly_at_diff_limit() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTGTGTCTGTCTGTCTGTCTGTCTGTGTGTGTGTGTGTGTGTGTGTGTGTGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("exact_limit_r1.fq");
let r2_path = temp_dir.path().join("exact_limit_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with overlap at diff limit"
);
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_very_long_overlap_lenient_mode() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
ACGTTCGTTCGTTCGTTCGTTCGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("long_overlap_r1.fq");
let r2_path = temp_dir.path().join("long_overlap_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with very long overlap"
);
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_mixed_overlapping_and_non_overlapping() {
let temp_dir = TempDir::new().unwrap();
let mut r1_data = String::new();
let mut r2_data = String::new();
for i in 0..5 {
r1_data.push_str(&format!(
"@overlap_read{i}/1\n\
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA\n\
+\n\
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
r2_data.push_str(&format!(
"@overlap_read{i}/2\n\
TTTTTTTTTTTTTTTTTTTTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTACGT\n\
+\n\
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
for i in 5..10 {
r1_data.push_str(&format!(
"@no_overlap_read{i}/1\n\
GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n\
+\n\
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
r2_data.push_str(&format!(
"@no_overlap_read{i}/2\n\
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n\
+\n\
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n"
));
}
let r1_path = temp_dir.path().join("mixed_r1.fq");
let r2_path = temp_dir.path().join("mixed_r2.fq");
std::fs::write(&r1_path, &r1_data).unwrap();
std::fs::write(&r2_path, &r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed with mixed reads");
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 20, "Should process all reads successfully");
}
#[test]
fn test_overlap_with_polyg_trimming() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGGGGGGGGGGGGGGGGGGGGGGGGGGGG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTACGTGGGG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("polyg_r1.fq");
let r2_path = temp_dir.path().join("polyg_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
"--trim-poly-g", ])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with overlap and polyG trimming"
);
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_overlap_detection_with_length_filter() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@read1/1
ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACAAAAAAAAAAAAAAAAAAAAAAAAAA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r2_data = "\
@read1/2
TTTTTTTTTTTTTTTTTTTTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTACGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
";
let r1_path = temp_dir.path().join("length_filter_r1.fq");
let r2_path = temp_dir.path().join("length_filter_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let json_out = temp_dir.path().join("out.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
"-j",
json_out.to_str().unwrap(),
"-l",
"20", ])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with overlap and length filtering"
);
let json_content = std::fs::read_to_string(&json_out).unwrap();
let json: serde_json::Value = serde_json::from_str(&json_content).unwrap();
let passed = json["summary"]["after_filtering"]["total_reads"]
.as_u64()
.unwrap();
assert!(passed <= 2, "Should process reads successfully");
}
#[test]
fn test_paired_real_data() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@SRR5808766.307 HISEQ:863:HKKM2BCXY:2:1101:20943:2179/1
GTCCCGGATTTCAGCACCAATTCTTAATAGCGTTCTCACGCCCGGCCAGGA
+
ADDDDIIHDHHGHIIGHIIIIGHHHHIHCHHHHIHIIIHIHHHHIHHHIHH
@SRR5808766.308 HISEQ:863:HKKM2BCXY:2:1101:21130:2043/1
NGTCAGGACCCTGGCTTGGTGGGCAGTGAGAGGGTGTCAGGACCCTGGCTT
+
#<DDDIIIIIIIIIIIIIIIIIIGDHIIGHEGHIEHHIHHH1GHHIEHHHE
";
let r2_data = "\
@SRR5808766.307 HISEQ:863:HKKM2BCXY:2:1101:20943:2179/2
GTGTGTGTATCTTAGTTACTTTTCTATTGCTGTGAAGAGACAGCATGAACA
+
B@D<DGEEHHIHIFHIIFHIIIEFHHHIHEHHIIG?FCHF1CHHIIHGGHH
@SRR5808766.308 HISEQ:863:HKKM2BCXY:2:1101:21130:2043/2
GACACCCTCTCACTGCCCACCAAGCCAGGGTCCTGACACCCTCTCACTGTC
+
DDDDDIIHIIIIIIHHIIIIIIIIIIIIIIIIIIEHIGHEEHIIIIHI?GH
";
let r1_path = temp_dir.path().join("length_filter_r1.fq");
let r2_path = temp_dir.path().join("length_filter_r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let out1 = temp_dir.path().join("out1.fq");
let out2 = temp_dir.path().join("out2.fq");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
out1.to_str().unwrap(),
"-O",
out2.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(
status.success(),
"fasterp should succeed with overlap and length filtering"
);
let out1_content = std::fs::read_to_string(&out1).unwrap();
let out2_content = std::fs::read_to_string(&out2).unwrap();
let output_fq = temp_dir.path().join("fastp_out.fq");
let output_fq2 = temp_dir.path().join("fastp_out2.fq");
let output_json = temp_dir.path().join("fastp_out.json");
let status = Command::new("fastp")
.arg("-i")
.arg(&r1_path)
.arg("-I")
.arg(&r2_path)
.arg("-o")
.arg(&output_fq)
.arg("-O")
.arg(&output_fq2)
.arg("-j")
.arg(&output_json)
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp - is it installed?");
assert!(status.success(), "fastp command failed");
let fastp_out1 = std::fs::read_to_string(&output_fq).unwrap();
let fastp_out2 = std::fs::read_to_string(&output_fq2).unwrap();
assert_eq!(out1_content, fastp_out1, "R1 outputs should match fastp");
assert_eq!(out2_content, fastp_out2, "R2 outputs should match fastp");
}
#[test]
fn test_offset_negative_21_overlap_trimming_comparison() {
let temp_dir = TempDir::new().unwrap();
let r1_data = "\
@SRR5808766.1362 HISEQ:863:HKKM2BCXY:2:1101:1811:3236/1
GGGGTATCTAATCCCAGTTTGGGTCTTAGCCTGTCTCTTATACACATCTCC
+
D@D@DHIIIIIIHEHIHIIIEHIHHHIEHHEHHHHHIHHHIIH?GHHHIII
";
let r2_data = "\
@SRR5808766.1362 HISEQ:863:HKKM2BCXY:2:1101:1811:3236/2
GCTAAGACCCAAACTGGGATTAGATACCCCCTGTCTCTTATACACATCTGA
+
D@@D@FHHHHH?HEHIE11FHHEHHII?HHHIIFEDCEHCHHC1FHFHCGE
";
let r1_path = temp_dir.path().join("r1.fq");
let r2_path = temp_dir.path().join("r2.fq");
std::fs::write(&r1_path, r1_data).unwrap();
std::fs::write(&r2_path, r2_data).unwrap();
let fasterp_out1 = temp_dir.path().join("fasterp_out1.fq");
let fasterp_out2 = temp_dir.path().join("fasterp_out2.fq");
let fasterp_json = temp_dir.path().join("fasterp.json");
let status = Command::new(assert_cmd::cargo::cargo_bin!("fasterp"))
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
fasterp_out1.to_str().unwrap(),
"-O",
fasterp_out2.to_str().unwrap(),
"-j",
fasterp_json.to_str().unwrap(),
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to execute fasterp");
assert!(status.success(), "fasterp should succeed");
let fastp_out1 = temp_dir.path().join("fastp_out1.fq");
let fastp_out2 = temp_dir.path().join("fastp_out2.fq");
let fastp_json = temp_dir.path().join("fastp.json");
let status = Command::new("fastp")
.args([
"-i",
r1_path.to_str().unwrap(),
"-I",
r2_path.to_str().unwrap(),
"-o",
fastp_out1.to_str().unwrap(),
"-O",
fastp_out2.to_str().unwrap(),
"-j",
fastp_json.to_str().unwrap(),
"-h",
"/dev/null",
])
.stderr(std::process::Stdio::null())
.stdout(std::process::Stdio::null())
.status()
.expect("Failed to run fastp - is it installed?");
assert!(status.success(), "fastp command failed");
let fasterp_r2_content = std::fs::read_to_string(&fasterp_out2).unwrap();
let fastp_r2_content = std::fs::read_to_string(&fastp_out2).unwrap();
let fasterp_r2_lines: Vec<&str> = fasterp_r2_content.lines().collect();
let fastp_r2_lines: Vec<&str> = fastp_r2_content.lines().collect();
let fasterp_r2_seq = fasterp_r2_lines[1];
let fastp_r2_seq = fastp_r2_lines[1];
assert_eq!(
fasterp_r2_seq.len(),
51,
"fasterp intentionally matches fastp's off-by-one bug"
);
assert_eq!(
fastp_r2_seq.len(),
51,
"fastp has off-by-one bug at boundary case"
);
assert_eq!(
fasterp_r2_seq, fastp_r2_seq,
"Both tools should produce identical output"
);
assert_eq!(
fasterp_r2_seq, "GCTAAGACCCAAACTGGGATTAGATACCCCCTGTCTCTTATACACATCTGA",
"Both R2 sequences remain untrimmed due to off-by-one bug"
);
println!("✓ Test confirms fastp off-by-one bug compatibility:");
println!(" - Both fasterp and fastp skip overlap at offset=-21 (boundary case)");
println!(" - This is due to off-by-one bug in fastp (overlapanalysis.cpp:67)");
println!(" - Fasterp intentionally replicates this bug for exact compatibility");
println!(" - See fastp_offset_bug_analysis.md for detailed analysis");
}