fxsplit 0.0.3

split FASTX into N chunks/files/headers
Documentation
mod common;

use common::*;

#[test]
fn fasta_gz_chunks_mean_records_per_file() {
    let t = tempdir();
    let input = join(t.path(), "input.fasta.gz");
    let outdir = join(t.path(), "out_chunks");

    write_gzip(
        &input,
        ">r1\nAAAA\n>r2\nCCCC\n>r3\nGGGG\n>r4\nTTTT\n>r5\nNNNN\n",
    );

    run_split(vec![
        "--file".to_string(),
        path_str(&input),
        "--chunks".to_string(),
        "2".to_string(),
        "--threads".to_string(),
        "2".to_string(),
        "--outdir".to_string(),
        path_str(&outdir),
    ]);

    let files = list_files(&outdir);
    assert_eq!(files.len(), 3);

    let counts: Vec<usize> = files
        .iter()
        .map(|name| count_fasta_records(&join(&outdir, name), true))
        .collect();
    assert_eq!(counts, vec![2, 2, 1]);
}

#[test]
fn fasta_gz_files_mean_number_of_output_files() {
    let t = tempdir();
    let input = join(t.path(), "input.fasta.gz");
    let outdir = join(t.path(), "out_files");

    write_gzip(
        &input,
        ">r1\nAAAA\n>r2\nCCCC\n>r3\nGGGG\n>r4\nTTTT\n>r5\nNNNN\n",
    );

    run_split(vec![
        "--file".to_string(),
        path_str(&input),
        "--files".to_string(),
        "3".to_string(),
        "--threads".to_string(),
        "2".to_string(),
        "--outdir".to_string(),
        path_str(&outdir),
    ]);

    let files = list_files(&outdir);
    assert_eq!(files.len(), 3);

    let counts: Vec<usize> = files
        .iter()
        .map(|name| count_fasta_records(&join(&outdir, name), true))
        .collect();
    assert_eq!(counts, vec![2, 2, 1]);
}

#[test]
fn fasta_gz_headers_use_sanitized_ids() {
    let t = tempdir();
    let input = join(t.path(), "input.fasta.gz");
    let outdir = join(t.path(), "out_headers");

    write_gzip(&input, ">seq one\nAAAA\n>seq/two\nCCCC\n>seq/two\nGGGG\n");

    run_split(vec![
        "--file".to_string(),
        path_str(&input),
        "--headers".to_string(),
        "--threads".to_string(),
        "2".to_string(),
        "--outdir".to_string(),
        path_str(&outdir),
    ]);

    let files = list_files(&outdir);
    assert_eq!(
        files,
        vec![
            "seq.fasta.gz".to_string(),
            "seq_two.fasta.gz".to_string(),
            "seq_two_1.fasta.gz".to_string()
        ]
    );

    for file in files {
        assert_eq!(count_fasta_records(&join(&outdir, &file), true), 1);
    }
}