use std::ffi;
use std::path::Path;
use std::rc::Rc;
use std::str;
use snafu::ensure;
use url::Url;
pub mod buffer;
pub mod errors;
pub mod header;
pub mod record;
use crate::bcf::header::{HeaderView, SampleSubset};
use crate::htslib;
pub use crate::bcf::errors::Error;
pub use crate::bcf::errors::Result;
pub use crate::bcf::header::{Header, HeaderRecord};
pub use crate::bcf::record::Record;
pub trait Read: Sized {
fn read(&mut self, record: &mut record::Record) -> Result<bool>;
fn records(&mut self) -> Records<'_, Self>;
fn header(&self) -> &HeaderView;
fn empty_record(&self) -> Record;
fn set_threads(&mut self, n_threads: usize) -> Result<()>;
}
#[derive(Debug)]
pub struct Reader {
inner: *mut htslib::htsFile,
header: Rc<HeaderView>,
}
unsafe impl Send for Reader {}
pub unsafe fn set_threads(hts_file: *mut htslib::htsFile, n_threads: usize) -> Result<()> {
assert!(n_threads > 0, "n_threads must be > 0");
let r = htslib::hts_set_threads(hts_file, n_threads as i32);
if r != 0 {
Err(Error::SetThreads)
} else {
Ok(())
}
}
impl Reader {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
match path.as_ref().to_str() {
Some(p) if path.as_ref().exists() => Ok(Self::new(p.as_bytes())?),
_ => Err(errors::Error::NonUnicodePath),
}
}
pub fn from_url(url: &Url) -> Result<Self> {
Self::new(url.as_str().as_bytes())
}
pub fn from_stdin() -> Result<Self> {
Self::new(b"-")
}
fn new(path: &[u8]) -> Result<Self> {
let htsfile = bcf_open(path, b"r")?;
let header = unsafe { htslib::bcf_hdr_read(htsfile) };
Ok(Reader {
inner: htsfile,
header: Rc::new(HeaderView::new(header)),
})
}
}
impl Read for Reader {
fn read(&mut self, record: &mut record::Record) -> Result<bool> {
match unsafe { htslib::bcf_read(self.inner, self.header.inner, record.inner) } {
0 => {
unsafe {
htslib::bcf_unpack(record.inner_mut(), htslib::BCF_UN_ALL as i32);
}
record.set_header(self.header.clone());
Ok(true)
}
-1 => Ok(false),
_ => Err(Error::InvalidRecord),
}
}
fn records(&mut self) -> Records<'_, Self> {
Records { reader: self }
}
fn set_threads(&mut self, n_threads: usize) -> Result<()> {
unsafe { set_threads(self.inner, n_threads) }
}
fn header(&self) -> &HeaderView {
&self.header
}
fn empty_record(&self) -> Record {
Record::new(self.header.clone())
}
}
impl Drop for Reader {
fn drop(&mut self) {
unsafe {
htslib::hts_close(self.inner);
}
}
}
#[derive(Debug)]
pub struct IndexedReader {
inner: *mut htslib::bcf_srs_t,
header: Rc<HeaderView>,
current_region: Option<(u32, u64, u64)>,
}
unsafe impl Send for IndexedReader {}
impl IndexedReader {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
match path.as_ref().to_str() {
Some(p) if path.as_ref().exists() => Ok(Self::new(&ffi::CString::new(p).unwrap())?),
_ => Err(Error::NonUnicodePath),
}
}
pub fn from_url(url: &Url) -> Result<Self> {
Self::new(&ffi::CString::new(url.as_str()).unwrap())
}
fn new(path: &ffi::CStr) -> Result<Self> {
let ser_reader = unsafe { htslib::bcf_sr_init() };
unsafe {
htslib::bcf_sr_set_opt(ser_reader, 0);
}
if unsafe { htslib::bcf_sr_add_reader(ser_reader, path.as_ptr()) } >= 0 {
let header = Rc::new(HeaderView::new(unsafe {
htslib::bcf_hdr_dup((*(*ser_reader).readers.offset(0)).header)
}));
Ok(IndexedReader {
inner: ser_reader,
header,
current_region: None,
})
} else {
Err(Error::Open {
target: path.to_str().unwrap().to_owned(),
})
}
}
pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
let contig = self.header.rid2name(rid).unwrap();
let contig = ffi::CString::new(contig).unwrap();
if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
Err(Error::Seek {
contig: contig.to_str().unwrap().to_owned(),
start,
})
} else {
self.current_region = Some((rid, start, end));
Ok(())
}
}
}
impl Read for IndexedReader {
fn read(&mut self, record: &mut record::Record) -> Result<bool> {
match unsafe { htslib::bcf_sr_next_line(self.inner) } {
0 => {
if unsafe { (*self.inner).errnum } != 0 {
Err(Error::InvalidRecord)
} else {
Ok(false)
}
}
i => {
assert!(i > 0, "Must not be negative");
unsafe {
htslib::bcf_copy(
record.inner,
*(*(*self.inner).readers.offset(0)).buffer.offset(0),
);
}
record.set_header(self.header.clone());
match self.current_region {
Some((rid, _start, end)) => {
if record.rid().is_some()
&& rid == record.rid().unwrap()
&& record.pos() as u64 <= end
{
Ok(true)
} else {
Ok(false)
}
}
None => Ok(true),
}
}
}
}
fn records(&mut self) -> Records<'_, Self> {
Records { reader: self }
}
fn set_threads(&mut self, n_threads: usize) -> Result<()> {
assert!(n_threads > 0, "n_threads must be > 0");
let r = unsafe { htslib::bcf_sr_set_threads(self.inner, n_threads as i32) };
if r != 0 {
Err(Error::SetThreads)
} else {
Ok(())
}
}
fn header(&self) -> &HeaderView {
&self.header
}
fn empty_record(&self) -> Record {
Record::new(self.header.clone())
}
}
impl Drop for IndexedReader {
fn drop(&mut self) {
unsafe { htslib::bcf_sr_destroy(self.inner) };
}
}
pub mod synced {
use super::*;
pub mod pairing {
pub const SNPS: u32 = crate::htslib::BCF_SR_PAIR_SNPS;
pub const INDELS: u32 = crate::htslib::BCF_SR_PAIR_INDELS;
pub const ANY: u32 = crate::htslib::BCF_SR_PAIR_ANY;
pub const SOME: u32 = crate::htslib::BCF_SR_PAIR_SOME;
pub const SNP_REF: u32 = crate::htslib::BCF_SR_PAIR_SNP_REF;
pub const INDEL_REF: u32 = crate::htslib::BCF_SR_PAIR_INDEL_REF;
pub const EXACT: u32 = crate::htslib::BCF_SR_PAIR_EXACT;
pub const BOTH: u32 = crate::htslib::BCF_SR_PAIR_BOTH;
pub const BOTH_REF: u32 = crate::htslib::BCF_SR_PAIR_BOTH_REF;
}
#[derive(Debug)]
pub struct SyncedReader {
inner: *mut crate::htslib::bcf_srs_t,
headers: Vec<Rc<HeaderView>>,
current_region: Option<(u32, u64, u64)>,
}
impl SyncedReader {
pub fn new() -> Result<Self> {
let inner = unsafe { crate::htslib::bcf_sr_init() };
ensure!(!inner.is_null(), errors::AllocationError);
Ok(SyncedReader {
inner,
headers: Vec::new(),
current_region: None,
})
}
pub fn set_require_index(&mut self, do_require: bool) {
unsafe {
(*self.inner).require_index = if do_require { 1 } else { 0 };
}
}
pub fn set_pairing(&mut self, bitmask: u32) {
unsafe {
crate::htslib::bcf_sr_set_opt(self.inner, 1, bitmask);
}
}
pub fn add_reader<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
match path.as_ref().to_str() {
Some(p) if path.as_ref().exists() => {
let p_cstring = ffi::CString::new(p).unwrap();
let res =
unsafe { crate::htslib::bcf_sr_add_reader(self.inner, p_cstring.as_ptr()) };
ensure!(
res != 0,
errors::Open {
target: p.to_owned()
}
);
let i = (self.reader_count() - 1) as isize;
let header = Rc::new(HeaderView::new(unsafe {
crate::htslib::bcf_hdr_dup((*(*self.inner).readers.offset(i)).header)
}));
self.headers.push(header);
Ok(())
}
_ => Err(errors::Error::NonUnicodePath),
}
}
pub fn remove_reader(&mut self, idx: u32) {
if idx >= self.reader_count() {
panic!("Invalid reader!");
} else {
unsafe {
crate::htslib::bcf_sr_remove_reader(self.inner, idx as i32);
}
self.headers.remove(idx as usize);
}
}
pub fn reader_count(&self) -> u32 {
unsafe { (*self.inner).nreaders as u32 }
}
pub fn read_next(&mut self) -> Result<u32> {
let num = unsafe { crate::htslib::bcf_sr_next_line(self.inner) as u32 };
if num == 0 {
ensure!(unsafe { (*self.inner).errnum } == 0, errors::InvalidRecord);
Ok(0)
} else {
assert!(num > 0, "num returned by htslib must not be negative");
match self.current_region {
Some((rid, _start, end)) => {
for idx in 0..self.reader_count() {
if !self.has_line(idx) {
continue;
}
unsafe {
let record = *(*(*self.inner).readers.offset(idx as isize))
.buffer
.offset(0);
if (*record).rid != (rid as i32) || (*record).pos >= (end as i64) {
return Ok(0);
}
}
}
Ok(num)
}
None => Ok(num),
}
}
}
pub fn has_line(&self, idx: u32) -> bool {
if idx >= self.reader_count() {
panic!("Invalid reader!");
} else {
unsafe { (*(*self.inner).has_line.offset(idx as isize)) != 0 }
}
}
pub fn record(&self, idx: u32) -> Option<Record> {
if self.has_line(idx) {
let record = Record::new(self.headers[idx as usize].clone());
unsafe {
crate::htslib::bcf_copy(
record.inner,
*(*(*self.inner).readers.offset(idx as isize))
.buffer
.offset(0),
);
}
Some(record)
} else {
None
}
}
pub fn header(&self, idx: u32) -> &HeaderView {
if idx >= self.reader_count() {
panic!("Invalid reader!");
} else {
&self.headers[idx as usize]
}
}
pub fn fetch(&mut self, rid: u32, start: u64, end: u64) -> Result<()> {
let contig = {
let contig = self.header(0).rid2name(rid).unwrap();
ffi::CString::new(contig).unwrap()
};
if unsafe { htslib::bcf_sr_seek(self.inner, contig.as_ptr(), start as i64) } != 0 {
Err(Error::Seek {
contig: contig.to_str().unwrap().to_owned(),
start,
})
} else {
self.current_region = Some((rid, start, end));
Ok(())
}
}
}
impl Drop for SyncedReader {
fn drop(&mut self) {
unsafe { crate::htslib::bcf_sr_destroy(self.inner) };
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum Format {
VCF,
BCF,
}
#[derive(Debug)]
pub struct Writer {
inner: *mut htslib::htsFile,
header: Rc<HeaderView>,
subset: Option<SampleSubset>,
}
unsafe impl Send for Writer {}
impl Writer {
pub fn from_path<P: AsRef<Path>>(
path: P,
header: &Header,
uncompressed: bool,
format: Format,
) -> Result<Self> {
if let Some(p) = path.as_ref().to_str() {
Ok(Self::new(p.as_bytes(), header, uncompressed, format)?)
} else {
Err(errors::Error::NonUnicodePath)
}
}
pub fn from_url(
url: &Url,
header: &Header,
uncompressed: bool,
format: Format,
) -> Result<Self> {
Self::new(url.as_str().as_bytes(), header, uncompressed, format)
}
pub fn from_stdout(header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
Self::new(b"-", header, uncompressed, format)
}
fn new(path: &[u8], header: &Header, uncompressed: bool, format: Format) -> Result<Self> {
let mode: &[u8] = match (uncompressed, format) {
(true, Format::VCF) => b"w",
(false, Format::VCF) => b"wz",
(true, Format::BCF) => b"wbu",
(false, Format::BCF) => b"wb",
};
let htsfile = bcf_open(path, mode)?;
unsafe { htslib::bcf_hdr_write(htsfile, header.inner) };
Ok(Writer {
inner: htsfile,
header: Rc::new(HeaderView::new(unsafe {
htslib::bcf_hdr_dup(header.inner)
})),
subset: header.subset.clone(),
})
}
pub fn header(&self) -> &HeaderView {
&self.header
}
pub fn empty_record(&self) -> Record {
record::Record::new(self.header.clone())
}
pub fn translate(&mut self, record: &mut record::Record) {
unsafe {
htslib::bcf_translate(self.header.inner, record.header().inner, record.inner);
}
record.set_header(self.header.clone());
}
pub fn subset(&mut self, record: &mut record::Record) {
if let Some(ref mut subset) = self.subset {
unsafe {
htslib::bcf_subset(
self.header.inner,
record.inner,
subset.len() as i32,
subset.as_mut_ptr(),
);
}
}
}
pub fn write(&mut self, record: &record::Record) -> Result<()> {
if unsafe { htslib::bcf_write(self.inner, self.header.inner, record.inner) } == -1 {
Err(Error::Write)
} else {
Ok(())
}
}
pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
unsafe { set_threads(self.inner, n_threads) }
}
}
impl Drop for Writer {
fn drop(&mut self) {
unsafe {
htslib::hts_close(self.inner);
}
}
}
#[derive(Debug)]
pub struct Records<'a, R: Read> {
reader: &'a mut R,
}
impl<'a, R: Read> Iterator for Records<'a, R> {
type Item = Result<record::Record>;
fn next(&mut self) -> Option<Result<record::Record>> {
let mut record = self.reader.empty_record();
match self.reader.read(&mut record) {
Err(e) => Some(Err(e)),
Ok(true) => Some(Ok(record)),
Ok(false) => None,
}
}
}
fn bcf_open(target: &[u8], mode: &[u8]) -> Result<*mut htslib::htsFile> {
let p = ffi::CString::new(target).unwrap();
let c_str = ffi::CString::new(mode).unwrap();
let ret = unsafe { htslib::hts_open(p.as_ptr(), c_str.as_ptr()) };
ensure!(
!ret.is_null(),
errors::Open {
target: str::from_utf8(target).unwrap().to_owned()
}
);
unsafe {
ensure!(
mode.contains(&b'w')
|| (*ret).format.category == htslib::htsFormatCategory_variant_data,
errors::Open {
target: str::from_utf8(target).unwrap().to_owned()
}
);
}
Ok(ret)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bcf::header::Id;
use crate::bcf::record::Numeric;
use std::fs::File;
use std::io::prelude::Read as IoRead;
use std::path::Path;
use std::str;
use tempdir;
fn _test_read<P: AsRef<Path>>(path: &P) {
let mut bcf = Reader::from_path(path).expect("Error opening file.");
assert_eq!(bcf.header.samples(), [b"NA12878.subsample-0.25-0"]);
for (i, rec) in bcf.records().enumerate() {
let mut record = rec.expect("Error reading record.");
assert_eq!(record.sample_count(), 1);
assert_eq!(record.rid().expect("Error reading rid."), 0);
assert_eq!(record.pos(), 10021 + i as i64);
assert_eq!(record.qual(), 0f32);
assert_eq!(
record
.info(b"MQ0F")
.float()
.ok()
.expect("Error reading info.")
.expect("Missing tag"),
[1.0]
);
if i == 59 {
assert_eq!(
record
.info(b"SGB")
.float()
.ok()
.expect("Error reading info.")
.expect("Missing tag"),
[-0.379885]
);
}
assert_eq!(record.alleles().iter().last().unwrap(), b"<X>");
let mut fmt = record.format(b"PL");
let pl = fmt.integer().expect("Error reading format.");
assert_eq!(pl.len(), 1);
if i == 59 {
assert_eq!(pl[0].len(), 6);
} else {
assert_eq!(pl[0].len(), 3);
}
}
}
#[test]
fn test_read() {
_test_read(&"test/test.bcf");
}
#[test]
fn test_reader_set_threads() {
let path = &"test/test.bcf";
let mut bcf = Reader::from_path(path).expect("Error opening file.");
bcf.set_threads(2).unwrap();
}
#[test]
fn test_writer_set_threads() {
let path = &"test/test.bcf";
let tmp = tempdir::TempDir::new("rust-htslib")
.ok()
.expect("Cannot create temp dir");
let bcfpath = tmp.path().join("test.bcf");
let bcf = Reader::from_path(path).expect("Error opening file.");
let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
.ok()
.expect("Error subsetting samples.");
let mut writer = Writer::from_path(&bcfpath, &header, false, Format::BCF)
.ok()
.expect("Error opening file.");
writer.set_threads(2).unwrap();
}
#[test]
fn test_fetch() {
let mut bcf = IndexedReader::from_path(&"test/test.bcf")
.ok()
.expect("Error opening file.");
bcf.set_threads(2).unwrap();
let rid = bcf
.header()
.name2rid(b"1")
.expect("Translating from contig '1' to ID failed.");
bcf.fetch(rid, 10_033, 10_060).expect("Fetching failed");
assert_eq!(bcf.records().count(), 28);
}
#[test]
fn test_write() {
let mut bcf = Reader::from_path(&"test/test_multi.bcf")
.ok()
.expect("Error opening file.");
let tmp = tempdir::TempDir::new("rust-htslib")
.ok()
.expect("Cannot create temp dir");
let bcfpath = tmp.path().join("test.bcf");
println!("{:?}", bcfpath);
{
let header = Header::from_template_subset(&bcf.header, &[b"NA12878.subsample-0.25-0"])
.ok()
.expect("Error subsetting samples.");
let mut writer = Writer::from_path(&bcfpath, &header, false, Format::BCF)
.ok()
.expect("Error opening file.");
for rec in bcf.records() {
let mut record = rec.expect("Error reading record.");
writer.translate(&mut record);
writer.subset(&mut record);
record.trim_alleles().expect("Error trimming alleles.");
writer.write(&record).expect("Error writing record");
}
}
{
_test_read(&bcfpath);
}
tmp.close().expect("Failed to delete temp dir");
}
#[test]
fn test_strings() {
let mut vcf = Reader::from_path(&"test/test_string.vcf")
.ok()
.expect("Error opening file.");
let fs1 = [
&b"LongString1"[..],
&b"LongString2"[..],
&b"."[..],
&b"LongString4"[..],
&b"evenlength"[..],
&b"ss6"[..],
];
for (i, rec) in vcf.records().enumerate() {
println!("record {}", i);
let mut record = rec.expect("Error reading record.");
assert_eq!(
record
.info(b"S1")
.string()
.ok()
.expect("Error reading string.")
.expect("Missing tag")[0],
format!("string{}", i + 1).as_bytes()
);
println!(
"{}",
String::from_utf8_lossy(
record
.format(b"FS1")
.string()
.ok()
.expect("Error reading string.")[0]
)
);
assert_eq!(
record
.format(b"FS1")
.string()
.ok()
.expect("Error reading string.")[0],
fs1[i]
);
}
}
#[test]
fn test_missing() {
let mut vcf = Reader::from_path(&"test/test_missing.vcf")
.ok()
.expect("Error opening file.");
let fn4 = [
&[
i32::missing(),
i32::missing(),
i32::missing(),
i32::missing(),
][..],
&[i32::missing()][..],
];
let f1 = [false, true];
for (i, rec) in vcf.records().enumerate() {
let mut record = rec.expect("Error reading record.");
assert_eq!(
record
.info(b"F1")
.float()
.ok()
.expect("Error reading float.")
.expect("Missing tag")[0]
.is_nan(),
f1[i]
);
assert_eq!(
record
.format(b"FN4")
.integer()
.ok()
.expect("Error reading integer.")[1],
fn4[i]
);
assert!(record
.format(b"FF4")
.float()
.ok()
.expect("Error reading float.")[1]
.iter()
.all(|&v| v.is_missing()));
}
}
#[test]
fn test_genotypes() {
let mut vcf = Reader::from_path(&"test/test_string.vcf")
.ok()
.expect("Error opening file.");
let expected = ["./1", "1|1", "0/1", "0|1", "1|.", "1/1"];
for (rec, exp_gt) in vcf.records().zip(expected.iter()) {
let mut rec = rec.expect("Error reading record.");
let genotypes = rec.genotypes().expect("Error reading genotypes");
assert_eq!(&format!("{}", genotypes.get(0)), exp_gt);
}
}
#[test]
fn test_header_ids() {
let vcf = Reader::from_path(&"test/test_string.vcf")
.ok()
.expect("Error opening file.");
let header = &vcf.header();
use crate::bcf::header::Id;
assert_eq!(header.id_to_name(Id(4)), b"GT");
assert_eq!(header.name_to_id(b"GT").unwrap(), Id(4));
assert!(header.name_to_id(b"XX").is_err());
}
#[test]
fn test_header_samples() {
let vcf = Reader::from_path(&"test/test_string.vcf")
.ok()
.expect("Error opening file.");
let header = &vcf.header();
assert_eq!(header.id_to_sample(Id(0)), b"one");
assert_eq!(header.id_to_sample(Id(1)), b"two");
assert_eq!(header.sample_to_id(b"one").unwrap(), Id(0));
assert_eq!(header.sample_to_id(b"two").unwrap(), Id(1));
assert!(header.sample_to_id(b"three").is_err());
}
#[test]
fn test_header_contigs() {
let vcf = Reader::from_path(&"test/test_multi.bcf")
.ok()
.expect("Error opening file.");
let header = &vcf.header();
assert_eq!(header.contig_count(), 86);
assert_eq!(header.rid2name(0).unwrap(), b"1");
assert_eq!(header.name2rid(b"1").unwrap(), 0);
assert_eq!(header.rid2name(85).unwrap(), b"hs37d5");
assert_eq!(header.name2rid(b"hs37d5").unwrap(), 85);
assert!(header.name2rid(b"nonexistent_contig").is_err());
assert!(header.rid2name(100).is_err());
}
#[test]
fn test_header_records() {
let vcf = Reader::from_path(&"test/test_string.vcf")
.ok()
.expect("Error opening file.");
let records = vcf.header().header_records();
assert_eq!(records.len(), 10);
match &records[1] {
&HeaderRecord::Filter {
ref key,
ref values,
} => {
assert_eq!(key, "FILTER");
assert_eq!(values["ID"], "PASS");
}
_ => {
assert!(false);
}
}
}
#[test]
fn test_header_info_types() {
let vcf = Reader::from_path(&"test/test.bcf").unwrap();
let header = vcf.header();
let truth = vec![
(
"INDEL",
header::TagType::Flag,
header::TagLength::Fixed(0),
),
(
"DP",
header::TagType::Integer,
header::TagLength::Fixed(1),
),
(
"QS",
header::TagType::Float,
header::TagLength::Alleles,
),
(
"I16",
header::TagType::Float,
header::TagLength::Fixed(16),
),
];
for (ref_name, ref_type, ref_length) in truth {
let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
assert_eq!(tag_type, ref_type);
assert_eq!(tag_length, ref_length);
}
let vcf = Reader::from_path(&"test/test_svlen.vcf").unwrap();
let header = vcf.header();
let truth = vec![
(
"IMPRECISE",
header::TagType::Flag,
header::TagLength::Fixed(0),
),
(
"SVTYPE",
header::TagType::String,
header::TagLength::Fixed(1),
),
(
"SVLEN",
header::TagType::Integer,
header::TagLength::Variable,
),
(
"CIGAR",
header::TagType::String,
header::TagLength::AltAlleles,
),
];
for (ref_name, ref_type, ref_length) in truth {
let (tag_type, tag_length) = header.info_type(ref_name.as_bytes()).unwrap();
assert_eq!(tag_type, ref_type);
assert_eq!(tag_length, ref_length);
}
assert!(header.info_type(b"NOT_THERE").is_err());
}
#[test]
fn test_remove_alleles() {
let mut bcf = Reader::from_path(&"test/test_multi.bcf").unwrap();
for res in bcf.records() {
let mut record = res.unwrap();
if record.pos() == 10080 {
record.remove_alleles(&[false, false, true]).unwrap();
assert_eq!(record.alleles(), [b"A", b"C"]);
}
}
}
fn read_all<P: AsRef<Path>>(path: P) -> String {
let mut file = File::open(path.as_ref())
.expect(&format!("Unable to open the file: {:?}", path.as_ref()));
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect(&format!("Unable to read the file: {:?}", path.as_ref()));
contents
}
#[test]
fn test_write_various() {
let tmp = tempdir::TempDir::new("rust-htslib")
.ok()
.expect("Cannot create temp dir");
let out_path = tmp.path().join("test_various.out.vcf");
let vcf = Reader::from_path(&"test/test_various.vcf")
.ok()
.expect("Error opening file.");
{
let mut writer = Writer::from_path(
&out_path,
&Header::from_template(&vcf.header()),
true,
Format::VCF,
)
.ok()
.expect("Error opening file.");
let header = writer.header().clone();
let mut record = writer.empty_record();
record.set_rid(Some(0));
assert_eq!(record.rid().unwrap(), 0);
record.set_pos(12);
assert_eq!(record.pos(), 12);
assert_eq!(str::from_utf8(record.id().as_ref()).ok().unwrap(), ".");
record.set_id("to_be_cleared".as_bytes()).unwrap();
assert_eq!(
str::from_utf8(record.id().as_ref()).ok().unwrap(),
"to_be_cleared"
);
record.clear_id().unwrap();
assert_eq!(str::from_utf8(record.id().as_ref()).ok().unwrap(), ".");
record.set_id("first_id".as_bytes()).unwrap();
record.push_id("second_id".as_bytes()).unwrap();
record.push_id("first_id".as_bytes()).unwrap();
assert!(record.filters().next().is_none());
record.set_filters(&[header.name_to_id(b"q10").unwrap()]);
record.push_filter(header.name_to_id(b"s50").unwrap());
record.remove_filter(header.name_to_id(b"q10").unwrap(), true);
record.push_filter(header.name_to_id(b"q10").unwrap());
record
.set_alleles(&["C".as_bytes(), "T".as_bytes(), "G".as_bytes()])
.unwrap();
record.set_qual(10.0);
record.push_info_integer(b"N1", &[32]).unwrap();
record.push_info_float(b"F1", &[33.0]).unwrap();
record
.push_info_string(b"S1", &["fourtytwo".as_bytes()])
.unwrap();
record.push_info_flag(b"X1").unwrap();
record
.push_format_string(b"FS1", &[&b"yes"[..], &b"no"[..]])
.unwrap();
record.push_format_integer(b"FF1", &[43, 11]).unwrap();
record.push_format_float(b"FN1", &[42.0, 10.0]).unwrap();
record
.push_format_char(b"CH1", &[b"A"[0], b"B"[0]])
.unwrap();
writer.write(&record).unwrap();
}
let expected = read_all("test/test_various.out.vcf");
let actual = read_all(&out_path);
assert_eq!(expected, actual);
}
#[test]
fn test_remove_headers() {
let vcf = Reader::from_path(&"test/test_headers.vcf")
.ok()
.expect("Error opening file.");
let tmp = tempdir::TempDir::new("rust-htslib")
.ok()
.expect("Cannot create temp dir");
let vcfpath = tmp.path().join("test.vcf");
let mut header = Header::from_template(&vcf.header);
header
.remove_contig(b"contig2")
.remove_info(b"INFO2")
.remove_format(b"FORMAT2")
.remove_filter(b"FILTER2")
.remove_structured(b"Foo2")
.remove_generic(b"Bar2");
{
let mut _writer = Writer::from_path(&vcfpath, &header, true, Format::VCF)
.ok()
.expect("Error opening output file.");
}
let expected = read_all("test/test_headers.out.vcf");
let actual = read_all(&vcfpath);
assert_eq!(expected, actual);
}
#[test]
fn test_synced_reader() {
let mut reader = synced::SyncedReader::new().unwrap();
reader.set_require_index(true);
reader.set_pairing(synced::pairing::SNPS);
assert_eq!(reader.reader_count(), 0);
reader.add_reader(&"test/test_left.vcf.gz").unwrap();
reader.add_reader(&"test/test_right.vcf.gz").unwrap();
assert_eq!(reader.reader_count(), 2);
let res1 = reader.read_next();
assert_eq!(res1.unwrap(), 2);
assert!(reader.has_line(0));
assert!(reader.has_line(1));
let res2 = reader.read_next();
assert_eq!(res2.unwrap(), 1);
assert!(reader.has_line(0));
assert!(!reader.has_line(1));
let res3 = reader.read_next();
assert_eq!(res3.unwrap(), 1);
assert!(!reader.has_line(0));
assert!(reader.has_line(1));
let res4 = reader.read_next();
assert_eq!(res4.unwrap(), 0);
}
#[test]
fn test_synced_reader_fetch() {
let mut reader = synced::SyncedReader::new().unwrap();
reader.set_require_index(true);
reader.set_pairing(synced::pairing::SNPS);
assert_eq!(reader.reader_count(), 0);
reader.add_reader(&"test/test_left.vcf.gz").unwrap();
reader.add_reader(&"test/test_right.vcf.gz").unwrap();
assert_eq!(reader.reader_count(), 2);
reader.fetch(0, 0, 1000).unwrap();
let res1 = reader.read_next();
assert_eq!(res1.unwrap(), 2);
assert!(reader.has_line(0));
assert!(reader.has_line(1));
let res2 = reader.read_next();
assert_eq!(res2.unwrap(), 1);
assert!(reader.has_line(0));
assert!(!reader.has_line(1));
let res3 = reader.read_next();
assert_eq!(res3.unwrap(), 1);
assert!(!reader.has_line(0));
assert!(reader.has_line(1));
let res4 = reader.read_next();
assert_eq!(res4.unwrap(), 0);
}
#[test]
fn test_svlen() {
let mut reader = Reader::from_path("test/test_svlen.vcf").unwrap();
let mut record = reader.empty_record();
reader.read(&mut record).unwrap();
assert_eq!(record.info(b"SVLEN").integer().unwrap(), Some(&[-127][..]));
}
#[test]
fn test_fails_on_bam() {
let reader = Reader::from_path("test/test.bam");
assert!(reader.is_err());
}
#[test]
fn test_fails_on_non_existiant() {
let reader = Reader::from_path("test/no_such_file");
assert!(reader.is_err());
}
#[test]
fn test_multi_string_info_tag() {
let mut reader = Reader::from_path("test/test-info-multi-string.vcf").unwrap();
let mut rec = reader.empty_record();
let _ = reader.read(&mut rec);
assert_eq!(rec.info(b"ANN").string().unwrap().unwrap().len(), 14);
}
#[test]
fn test_multi_string_info_tag_number_a() {
let mut reader = Reader::from_path("test/test-info-multi-string-number=A.vcf").unwrap();
let mut rec = reader.empty_record();
let _ = reader.read(&mut rec);
assert_eq!(rec.info(b"X").string().unwrap().unwrap().len(), 2);
}
}