#[macro_use]
#[path = "common/mod.rs"]
mod common;
use common::TvaCmd;
use std::collections::HashSet;
use std::fs;
use std::path::PathBuf;
use test_case::test_case;
#[test_case(
"a\nb\nc\nd\n",
&[],
|lines: &[&str]| {
let mut sorted: Vec<&str> = lines.to_vec();
sorted.sort();
assert_eq!(sorted, vec!["a", "b", "c", "d"]);
};
"shuffle_basic"
)]
#[test_case(
"a\nb\nc\nd\n",
&["--num", "2"],
|lines: &[&str]| {
assert_eq!(lines.len(), 2);
for line in lines {
assert!(["a", "b", "c", "d"].contains(line));
}
};
"num_limited"
)]
#[test_case(
"a\nb\nc\nd\n",
&["--prob", "0.5"],
|lines: &[&str]| {
assert!(lines.len() <= 4);
for line in lines {
assert!(["a", "b", "c", "d"].contains(line));
}
};
"prob_keeps_subset"
)]
fn test_sample_basic(input: &str, args: &[&str], assertions: fn(&[&str])) {
let (stdout, _) = TvaCmd::new()
.args(&["sample"])
.args(args)
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assertions(&lines);
}
#[test]
fn sample_header_preserved() {
let input = "h1\n1\n2\n3\n";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--header", "--num", "2"])
.stdin(input)
.run();
let mut lines = stdout.lines();
let header = lines.next().unwrap();
assert_eq!(header, "h1");
let data: Vec<&str> = lines.collect();
assert_eq!(data.len(), 2);
for line in &data {
assert!(["1", "2", "3"].contains(line));
}
}
#[test_case(
&["--prob", "0.0"],
"",
|stderr: &str| stderr.contains("invalid --prob/-p value");
"invalid_prob_rejected"
)]
#[test_case(
&["-n", "10", "-p", "0.5"],
"a\n",
|stderr: &str| stderr.contains("--num/-n and --prob/-p cannot be used together");
"num_prob_conflict"
)]
#[test_case(
&["-r", "-p", "0.5"],
"a\n",
|stderr: &str| stderr.contains("--replace/-r cannot be used with --prob/-p");
"replace_prob_conflict"
)]
#[test_case(
&["-r"],
"a\n",
|stderr: &str| stderr.contains("--replace/-r requires --num/-n greater than 0");
"replace_no_num"
)]
#[test_case(
&["-i", "-r", "-n", "5"],
"a\n",
|stderr: &str| stderr.contains("--inorder/-i requires --num/-n without --replace/-r or --prob/-p");
"inorder_conflicts"
)]
#[test_case(
&["-w", "1", "-p", "0.5"],
"a\n",
|stderr: &str| stderr.contains("--weight-field/-w cannot be used with --prob/-p");
"weight_prob_conflict"
)]
#[test_case(
&["-p", "1.5"],
"a\n",
|stderr: &str| stderr.contains("invalid --prob/-p value");
"invalid_prob"
)]
#[test_case(
&["--gen-random-inorder", "-n", "10"],
"a\n",
|stderr: &str| stderr.contains("--gen-random-inorder cannot be combined with sampling options");
"gen_random_inorder_conflicts"
)]
#[test_case(
&["-w", "1", "-r", "-n", "10"],
"a\n",
|stderr: &str| stderr.contains("--weight-field/-w cannot be used with --replace/-r");
"weight_replace_conflict"
)]
#[test_case(
&["-k", "1"],
"a\n",
|stderr: &str| stderr.contains("--key-fields/-k requires --prob/-p");
"key_no_prob"
)]
#[test_case(
&["-k", "1", "-p", "0.5", "-n", "10"],
"a\n",
|stderr: &str| stderr.contains("--key-fields/-k cannot be used with --num/-n");
"key_conflicts"
)]
#[test_case(
&["--print-random", "--gen-random-inorder"],
"a\n",
|stderr: &str| stderr.contains("--print-random cannot be used with --gen-random-inorder");
"print_random_gen_random_conflict"
)]
#[test_case(
&["--print-random", "-r", "-n", "10"],
"a\n",
|stderr: &str| stderr.contains("--print-random is not supported with --replace/-r");
"print_random_replace_conflict"
)]
#[test_case(
&["-w", "5", "-n", "1"],
"a\tb\n",
|stderr: &str| stderr.contains("weight field index 5 out of range");
"weight_index_out_of_range"
)]
#[test_case(
&["-w", "1", "-n", "1"],
"not_a_number\n",
|stderr: &str| stderr.contains("weight value `not_a_number` is not a valid number");
"weight_invalid_value"
)]
#[test_case(
&["-k", "5", "-p", "0.5"],
"a\tb\n",
|stderr: &str| stderr.contains("key field index 5 out of range");
"key_index_out_of_range"
)]
fn test_sample_errors(args: &[&str], input: &str, check: fn(&str) -> bool) {
let (_, stderr) = TvaCmd::new()
.args(&["sample"])
.args(args)
.stdin(input)
.run_fail();
assert!(
check(&stderr),
"Expected error message not found in: {}",
stderr
);
}
#[test]
fn sample_static_seed_produces_reproducible_output() {
let input = "a\nb\nc\nd\n";
let (s1, _) = TvaCmd::new()
.args(&["sample", "--num", "3", "--static-seed"])
.stdin(input)
.run();
let (s2, _) = TvaCmd::new()
.args(&["sample", "--num", "3", "--static-seed"])
.stdin(input)
.run();
assert_eq!(s1, s2);
}
#[test_case(
&["--replace"],
"a\nb\nc\nd\n",
|stderr: &str| stderr.contains("requires --num/-n greater than 0");
"replace_requires_num"
)]
fn test_sample_replace_errors(args: &[&str], input: &str, check: fn(&str) -> bool) {
let (_, stderr) = TvaCmd::new()
.args(&["sample"])
.args(args)
.stdin(input)
.run_fail();
assert!(check(&stderr));
}
#[test]
fn sample_replace_conflicts_with_prob() {
let input = "a\nb\nc\nd\n";
TvaCmd::new()
.args(&["sample", "--replace", "--num", "2", "--prob", "0.5"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_replace_basic() {
let input = "a\nb\nc\nd\n";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--num", "10", "--replace", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 10);
for line in &lines {
assert!(["a", "b", "c", "d"].contains(line));
}
}
#[test_case(
&["--inorder"],
"a\nb\nc\nd\n",
|_stderr: &str| true; // Just check it fails
"inorder_requires_num"
)]
fn test_sample_inorder_errors(args: &[&str], input: &str, _check: fn(&str) -> bool) {
TvaCmd::new()
.args(&["sample"])
.args(args)
.stdin(input)
.run_fail();
}
#[test]
fn sample_inorder_conflicts_with_prob() {
let input = "a\nb\nc\nd\n";
TvaCmd::new()
.args(&["sample", "--num", "2", "--prob", "0.5", "--inorder"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_inorder_conflicts_with_replace() {
let input = "a\nb\nc\nd\n";
TvaCmd::new()
.args(&["sample", "--num", "2", "--replace", "--inorder"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_inorder_basic() {
let input = "a\nb\nc\nd\n";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--num", "2", "--inorder", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 2);
for line in &lines {
assert!(["a", "b", "c", "d"].contains(line));
}
let pos = |s: &str| match s {
"a" => 0,
"b" => 1,
"c" => 2,
"d" => 3,
_ => 10,
};
assert!(pos(lines[0]) < pos(lines[1]));
}
#[test]
fn sample_weight_field_basic() {
let input = "x\t1\nx\t10\nx\t100\nx\t1000\n";
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--num",
"1",
"--weight-field",
"2",
"--static-seed",
])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 1);
assert_eq!(lines[0], "x\t1000");
}
#[test]
fn sample_weight_field_header_by_name() {
let input = "name\tw\nx\t1\ny\t10\nz\t100\n";
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--num",
"1",
"--weight-field",
"w",
"--static-seed",
])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "name\tw");
}
#[test]
fn sample_weight_field_conflicts_with_prob_and_replace() {
let input = "x\t1\nx\t10\n";
TvaCmd::new()
.args(&[
"sample",
"--num",
"1",
"--weight-field",
"2",
"--prob",
"0.5",
])
.stdin(input)
.run_fail();
TvaCmd::new()
.args(&["sample", "--num", "1", "--weight-field", "2", "--replace"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_weight_field_invalid_field_list_reports_error() {
let input = "x\t1\nx\t10\n";
let (_, stderr) = TvaCmd::new()
.args(&["sample", "--num", "1", "--weight-field", "0"])
.stdin(input)
.run_fail();
assert!(stderr.contains("Error:"));
}
#[test]
fn sample_key_fields_requires_prob() {
let input = "k\tv\na\t1\n";
TvaCmd::new()
.args(&["sample", "--header", "--key-fields", "k"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_key_fields_distinct_per_key() {
let input = "k\tv\na\t1\na\t2\nb\t3\nb\t4\n";
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--prob",
"0.5",
"--key-fields",
"k",
"--static-seed",
])
.stdin(input)
.run();
let mut lines = stdout.lines();
let header = lines.next().unwrap();
assert_eq!(header, "k\tv");
let data: Vec<&str> = lines.collect();
let mut count_a = 0;
let mut count_b = 0;
for line in &data {
let cols: Vec<&str> = line.split('\t').collect();
match cols[0] {
"a" => count_a += 1,
"b" => count_b += 1,
_ => {}
}
}
assert!(count_a == 0 || count_a == 2);
assert!(count_b == 0 || count_b == 2);
}
#[test]
fn sample_gen_random_inorder_basic() {
let input = "k\tv\na\t1\nb\t2\n";
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--gen-random-inorder",
"--static-seed",
])
.stdin(input)
.run();
let mut lines = stdout.lines();
let header = lines.next().unwrap();
let header_cols: Vec<&str> = header.split('\t').collect();
assert_eq!(header_cols[0], "random_value");
assert_eq!(header_cols[1], "k");
assert_eq!(header_cols[2], "v");
let data: Vec<&str> = lines.collect();
assert_eq!(data.len(), 2);
let cols1: Vec<&str> = data[0].split('\t').collect();
assert!(cols1[0].parse::<f64>().is_ok());
assert_eq!(cols1[1], "a");
assert_eq!(cols1[2], "1");
let cols2: Vec<&str> = data[1].split('\t').collect();
assert!(cols2[0].parse::<f64>().is_ok());
assert_eq!(cols2[1], "b");
assert_eq!(cols2[2], "2");
}
#[test]
fn sample_gen_random_inorder_conflicts_with_sampling() {
let input = "a\nb\nc\n";
TvaCmd::new()
.args(&["sample", "--gen-random-inorder", "--num", "2"])
.stdin(input)
.run_fail();
TvaCmd::new()
.args(&["sample", "--gen-random-inorder", "--prob", "0.5"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_print_random_basic() {
let input = "a\nb\nc\n";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--print-random", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 3);
for line in &lines {
let cols: Vec<&str> = line.split('\t').collect();
assert!(cols[0].parse::<f64>().is_ok());
}
}
#[test]
fn sample_print_random_not_allowed_with_replace() {
let input = "a\nb\nc\n";
TvaCmd::new()
.args(&["sample", "--num", "5", "--replace", "--print-random"])
.stdin(input)
.run_fail();
}
#[test]
fn sample_compat_num_superset() {
let mut input = String::new();
for i in 0..20 {
input.push_str(&format!("{}\n", i));
}
let (stdout_small, _) = TvaCmd::new()
.args(&[
"sample",
"--compatibility-mode",
"--static-seed",
"--num",
"5",
])
.stdin(input.clone())
.run();
let lines_small: HashSet<String> =
stdout_small.lines().map(|s| s.to_string()).collect();
let (stdout_large, _) = TvaCmd::new()
.args(&[
"sample",
"--compatibility-mode",
"--static-seed",
"--num",
"10",
])
.stdin(input)
.run();
let lines_large: HashSet<String> =
stdout_large.lines().map(|s| s.to_string()).collect();
assert!(lines_small.is_subset(&lines_large));
}
#[test]
fn sample_compat_multi_file_from_tsv_sample_inputs() {
let base = PathBuf::from("tests/data/sample");
let input1 = base.join("input3x10.tsv");
let input2 = base.join("input3x25.tsv");
let header_input = fs::read_to_string(&input1).unwrap();
let mut header_lines = header_input.lines();
let expected_header = header_lines.next().unwrap();
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--static-seed",
"--compatibility-mode",
input1.to_str().unwrap(),
input2.to_str().unwrap(),
])
.run();
let mut out_lines = stdout.lines();
let header = out_lines.next().unwrap();
assert_eq!(header, expected_header);
fn count_data_rows(path: &PathBuf) -> anyhow::Result<usize> {
let contents = fs::read_to_string(path)?;
let mut it = contents.lines();
let _ = it.next();
Ok(it.count())
}
let expected_rows =
count_data_rows(&input1).unwrap() + count_data_rows(&input2).unwrap();
let out_data: Vec<&str> = out_lines.collect();
assert_eq!(out_data.len(), expected_rows);
}
#[test]
fn sample_compat_stdin_and_files_from_tsv_sample_inputs() {
let base = PathBuf::from("tests/data/sample");
let stdin_path = base.join("input3x10.tsv");
let file1 = base.join("input3x3.tsv");
let file2 = base.join("input3x4.tsv");
let stdin_data = fs::read_to_string(&stdin_path).unwrap();
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--static-seed",
"--compatibility-mode",
"--",
"-",
file1.to_str().unwrap(),
file2.to_str().unwrap(),
])
.stdin(stdin_data)
.run();
let mut out_lines = stdout.lines();
let header = out_lines.next().unwrap();
let stdin_header = fs::read_to_string(&stdin_path).unwrap();
let mut stdin_lines = stdin_header.lines();
let expected_header = stdin_lines.next().unwrap();
assert_eq!(header, expected_header);
fn count_rows_with_header(
path: &PathBuf,
has_header: bool,
) -> anyhow::Result<usize> {
let contents = fs::read_to_string(path)?;
let mut it = contents.lines();
if has_header {
let _ = it.next();
}
Ok(it.count())
}
let expected_rows = count_rows_with_header(&stdin_path, true).unwrap()
+ count_rows_with_header(&file1, true).unwrap()
+ count_rows_with_header(&file2, true).unwrap();
let out_data: Vec<&str> = out_lines.collect();
assert_eq!(out_data.len(), expected_rows);
}
#[test]
fn sample_windows_newlines_from_tsv_sample_inputs() {
let base = PathBuf::from("tests/data/sample");
let unix_path = base.join("input3x25.tsv");
let dos_path = base.join("input3x25.dos_tsv");
let unix_contents = fs::read_to_string(&unix_path).unwrap();
let mut unix_lines = unix_contents.lines();
let unix_header = unix_lines.next().unwrap();
let unix_data_count = unix_lines.count();
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--header", dos_path.to_str().unwrap()])
.run();
let mut out_lines = stdout.lines();
let header = out_lines.next().unwrap();
assert_eq!(header, unix_header);
let out_data: Vec<&str> = out_lines.collect();
assert_eq!(out_data.len(), unix_data_count);
}
#[test]
fn sample_distinct_basic() {
let input = "a\t1
a\t2
b\t1
a\t3
b\t2
";
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--key-fields",
"1",
"--prob",
"0.5",
"--static-seed",
])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
let a_count = lines.iter().filter(|l| l.starts_with("a\t")).count();
let b_count = lines.iter().filter(|l| l.starts_with("b\t")).count();
assert!(a_count == 0 || a_count == 3, "a_count was {}", a_count);
assert!(b_count == 0 || b_count == 2, "b_count was {}", b_count);
}
#[test]
fn sample_weighted_shuffle() {
let input = "A\t1
B\t100
C\t1
";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--weight-field", "2", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 3);
assert!(lines.contains(&"A\t1"));
assert!(lines.contains(&"B\t100"));
assert!(lines.contains(&"C\t1"));
}
#[test]
fn sample_print_random() {
let input = "a\nb\n";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--num", "1", "--print-random", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 1);
let parts: Vec<&str> = lines[0].split('\t').collect();
assert_eq!(parts.len(), 2);
let _val: f64 = parts[0]
.parse()
.expect("First field should be random value");
}
#[test]
fn sample_gen_random_inorder() {
let input = "a
b
c
";
let (stdout, _) = TvaCmd::new()
.args(&["sample", "--gen-random-inorder", "--static-seed"])
.stdin(input)
.run();
let lines: Vec<&str> = stdout.lines().collect();
assert_eq!(lines.len(), 3);
assert!(lines[0].ends_with("\ta"));
assert!(lines[1].ends_with("\tb"));
assert!(lines[2].ends_with("\tc"));
let input_h = "h\na\nb\n";
let (stdout_h, _) = TvaCmd::new()
.args(&[
"sample",
"--gen-random-inorder",
"--header",
"--static-seed",
])
.stdin(input_h)
.run();
let lines_h: Vec<&str> = stdout_h.lines().collect();
assert_eq!(lines_h[0], "random_value\th");
assert!(lines_h[1].ends_with("\ta"));
}
#[test]
fn sample_multiple_files() {
let base = PathBuf::from("tests/data/sample");
let input1 = base.join("input3x3.tsv");
let input2 = base.join("input3x4.tsv");
let (stdout, _) = TvaCmd::new()
.args(&[
"sample",
"--header",
"--static-seed",
input1.to_str().unwrap(),
input2.to_str().unwrap(),
])
.run();
let lines: Vec<&str> = stdout.lines().collect();
let header = lines[0];
assert!(header.contains("line"));
assert_eq!(lines.len(), 8);
}