use std::convert::TryInto;
use std::io::Cursor;
use twobit::convert::fasta::{to_fasta, FastaReader};
use twobit::convert::to_2bit;
use twobit::TwoBitFile;
fn u64_to_usize(v: u64) -> usize {
v.try_into().expect("testing")
}
#[test]
fn fasta_to_twobit() {
let seq1 = "NNNNNNAGTCAGT\nCGTCAGCTAGTacgtcagtctgacgtTACG\nTGCATGCGATNNNNACGTANNN";
let seq2 = "gtca\ngtcgtagtgctaTGACG\nTAGCTGACGT\nagtcgtc\ngctaggcACGTCGTAGCTACGT";
let seq3 = "AGTCGTAG\nNCTNGNNN\nNNACGTTAGANGTCGTCAGTNGTACNCGNTATCNGTG\nNCNNNN";
let data = format!(">seq1\n{}\n>seq2\n{}\n>seq3\n{}", seq1, seq2, seq3);
let fasta: Vec<u8> = data.as_bytes().into();
let reader = FastaReader::mem_open(fasta).expect("testing");
let mut out = Cursor::new(vec![]);
to_2bit(&mut out, &reader).expect("testing");
let twobit_file_data: Vec<u8> = out.into_inner();
let twobit = TwoBitFile::from_buf(twobit_file_data).expect("testing");
for (i, info) in twobit.sequence_info().iter().enumerate() {
match i {
0 => {
assert_eq!(info.chr, "seq1");
assert_eq!(info.length, 65);
assert_eq!(info.hard_masks_total_length, 13);
assert_eq!(info.soft_masks_total_length, 15);
}
1 => {
assert_eq!(info.chr, "seq2");
assert_eq!(info.length, 60);
assert_eq!(info.hard_masks_total_length, 0);
assert_eq!(info.soft_masks_total_length, 30);
}
2 => {
assert_eq!(info.chr, "seq3");
assert_eq!(info.length, 59);
assert_eq!(info.hard_masks_total_length, 17);
assert_eq!(info.soft_masks_total_length, 0);
}
_ => assert!(false),
}
}
assert_eq!(twobit.sequence_info().len(), 3);
}
#[test]
fn twobit_fasta_roundtrip() {
let mut reader = TwoBitFile::open("assets/foo.2bit")
.expect("testing")
.enable_softmask(true);
let mut fasta_buf = Cursor::new(Vec::with_capacity(300));
to_fasta(&mut reader, &mut fasta_buf).expect("testing");
let mut fasta_reader = FastaReader::mem_open(fasta_buf.into_inner()).expect("testing");
let mut twobit_buf = Cursor::new(Vec::with_capacity(u64_to_usize(
reader.info().expect("testing").file_size,
)));
to_2bit(&mut twobit_buf, &mut fasta_reader).expect("testing");
let original = std::fs::read("assets/foo.2bit").expect("testing");
assert_eq!(original.len(), twobit_buf.get_ref().len());
for (old, new) in original.iter().zip(twobit_buf.get_ref()) {
assert_eq!(old, new);
}
}
#[test]
fn read_seq_roundtrips_with_terminal_partial_byte() {
let long_seq = format!(
"{}{}{}{}{}{}",
"A".repeat(10),
"T".repeat(10),
"C".repeat(5),
"A".repeat(5),
"T".repeat(10),
"C".repeat(10)
);
let seqs_with_lengths_not_divisible_by_4 = &[
"A",
"AC",
"ACG",
"ACGTA",
"ACGTG",
"ACGTGC",
"ACGTGCA",
"ACGTGCACG",
&long_seq,
];
for seq in seqs_with_lengths_not_divisible_by_4 {
if seq.len().is_multiple_of(4) {
panic!("{seq}");
}
let data = format!(">chr1\n{seq}\n");
let fasta: Vec<u8> = data.as_bytes().into();
let reader = FastaReader::mem_open(fasta).expect("testing");
let mut out = Cursor::new(vec![]);
to_2bit(&mut out, &reader).expect("testing");
let twobit_file_data: Vec<u8> = out.into_inner();
let mut twobit = TwoBitFile::from_buf(twobit_file_data).expect("testing");
let other_seq = twobit.read_sequence("chr1", ..).expect("test");
assert_eq!(seq, &other_seq);
}
}