use std::borrow::Borrow;
use std::f32;
use std::ffi;
use std::fmt;
use std::i32;
use std::ptr;
use std::rc::Rc;
use std::slice;
use ieee754::Ieee754;
use itertools::Itertools;
use bcf::header::{HeaderView, Id};
use htslib;
const MISSING_INTEGER: i32 = i32::MIN;
const VECTOR_END_INTEGER: i32 = i32::MIN + 1;
lazy_static! {
static ref MISSING_FLOAT: f32 = Ieee754::from_bits(0x7F800001);
static ref VECTOR_END_FLOAT: f32 = Ieee754::from_bits(0x7F800002);
}
pub trait Numeric {
fn is_missing(&self) -> bool;
fn missing() -> Self;
}
impl Numeric for f32 {
fn is_missing(&self) -> bool {
self.bits() == MISSING_FLOAT.bits()
}
fn missing() -> f32 {
*MISSING_FLOAT
}
}
impl Numeric for i32 {
fn is_missing(&self) -> bool {
*self == MISSING_INTEGER
}
fn missing() -> i32 {
MISSING_INTEGER
}
}
trait NumericUtils {
fn is_vector_end(&self) -> bool;
}
impl NumericUtils for f32 {
fn is_vector_end(&self) -> bool {
self.bits() == VECTOR_END_FLOAT.bits()
}
}
impl NumericUtils for i32 {
fn is_vector_end(&self) -> bool {
*self == VECTOR_END_INTEGER
}
}
#[derive(Debug)]
pub struct Record {
pub inner: *mut htslib::bcf1_t,
header: Rc<HeaderView>,
buffer: *mut ::std::os::raw::c_void,
buffer_len: i32,
}
impl Record {
pub(crate) fn new(header: Rc<HeaderView>) -> Self {
let inner = unsafe {
let inner = htslib::bcf_init();
htslib::bcf_unpack(inner, htslib::BCF_UN_ALL as i32);
inner
};
Record {
inner: inner,
header: header,
buffer: ptr::null_mut(),
buffer_len: 0,
}
}
pub fn unpack(&mut self) {
unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
}
pub fn header(&self) -> &HeaderView {
self.header.as_ref()
}
pub(crate) fn set_header(&mut self, header: Rc<HeaderView>) {
self.header = header;
}
pub fn inner(&self) -> &htslib::bcf1_t {
unsafe { &*self.inner }
}
pub fn inner_mut(&mut self) -> &mut htslib::bcf1_t {
unsafe { &mut *self.inner }
}
pub fn rid(&self) -> Option<u32> {
match self.inner().rid {
-1 => None,
rid => Some(rid as u32),
}
}
pub fn set_rid(&mut self, rid: &Option<u32>) {
match rid {
&Some(rid) => self.inner_mut().rid = rid as i32,
&None => self.inner_mut().rid = -1,
}
}
pub fn pos(&self) -> u32 {
self.inner().pos as u32
}
pub fn set_pos(&mut self, pos: i32) {
self.inner_mut().pos = pos;
}
pub fn id(&self) -> Vec<u8> {
if self.inner().d.id.is_null() {
b".".to_vec()
} else {
let id = unsafe { ffi::CStr::from_ptr(self.inner().d.id) };
id.to_bytes().to_vec()
}
}
pub fn set_id(&mut self, id: &[u8]) -> Result<(), IdWriteError> {
if unsafe {
htslib::bcf_update_id(
self.header().inner,
self.inner,
ffi::CString::new(id).unwrap().as_ptr() as *mut i8,
)
} == 0
{
Ok(())
} else {
Err(IdWriteError::Some)
}
}
pub fn clear_id(&mut self) -> Result<(), IdWriteError> {
if unsafe {
htslib::bcf_update_id(
self.header().inner,
self.inner,
ffi::CString::new(".".as_bytes()).unwrap().as_ptr() as *mut i8,
)
} == 0
{
Ok(())
} else {
Err(IdWriteError::Some)
}
}
pub fn push_id(&mut self, id: &[u8]) -> Result<(), IdWriteError> {
if unsafe {
htslib::bcf_add_id(
self.header().inner,
self.inner,
ffi::CString::new(id).unwrap().as_ptr() as *mut i8,
)
} == 0
{
Ok(())
} else {
Err(IdWriteError::Some)
}
}
pub fn filters(&self) -> Filters {
Filters::new(self)
}
pub fn has_filter(&self, flt_id: &Id) -> bool {
if **flt_id == 0 && self.inner().d.n_flt == 0 {
return true;
}
for i in 0..(self.inner().d.n_flt as isize) {
if unsafe { *self.inner().d.flt.offset(i) } == **flt_id as i32 {
return true;
}
}
false
}
pub fn set_filters(&mut self, flt_ids: &[Id]) {
let mut flt_ids: Vec<i32> = flt_ids.iter().map(|x| **x as i32).collect();
unsafe {
htslib::bcf_update_filter(
self.header().inner,
self.inner,
flt_ids.as_mut_ptr(),
flt_ids.len() as i32,
);
}
}
pub fn push_filter(&mut self, flt_id: Id) {
unsafe {
htslib::bcf_add_filter(self.header().inner, self.inner, *flt_id as i32);
}
}
pub fn remove_filter(&mut self, flt_id: Id, pass_on_empty: bool) {
unsafe {
htslib::bcf_remove_filter(
self.header().inner,
self.inner,
*flt_id as i32,
pass_on_empty as i32,
);
}
}
pub fn alleles(&self) -> Vec<&[u8]> {
unsafe { htslib::bcf_unpack(self.inner, htslib::BCF_UN_ALL as i32) };
let n = self.inner().n_allele() as usize;
let dec = self.inner().d;
let alleles = unsafe { slice::from_raw_parts(dec.allele, n) };
(0..n)
.map(|i| unsafe { ffi::CStr::from_ptr(alleles[i]).to_bytes() })
.collect()
}
pub fn set_alleles(&mut self, alleles: &[&[u8]]) -> Result<(), AlleleWriteError> {
let cstrings: Vec<ffi::CString> = alleles
.iter()
.map(|vec| ffi::CString::new(*vec).unwrap())
.collect();
let mut ptrs: Vec<*const i8> = cstrings
.iter()
.map(|cstr| cstr.as_ptr() as *const i8)
.collect();
if unsafe {
htslib::bcf_update_alleles(
self.header().inner,
self.inner,
ptrs.as_mut_ptr(),
alleles.len() as i32,
)
} == 0
{
Ok(())
} else {
Err(AlleleWriteError::Some)
}
}
pub fn qual(&self) -> f32 {
self.inner().qual
}
pub fn set_qual(&mut self, qual: f32) {
self.inner_mut().qual = qual;
}
pub fn info<'a>(&'a mut self, tag: &'a [u8]) -> Info {
Info {
record: self,
tag: tag,
}
}
pub fn sample_count(&self) -> u32 {
self.inner().n_sample()
}
pub fn allele_count(&self) -> u32 {
self.inner().n_allele()
}
pub fn genotypes(&mut self) -> Result<Genotypes, FormatReadError> {
Ok(Genotypes {
encoded: try!(self.format(b"GT").integer()),
})
}
pub fn format<'a>(&'a mut self, tag: &'a [u8]) -> Format {
Format::new(self, tag)
}
pub fn push_format_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<(), TagWriteError> {
self.push_format(tag, data, htslib::BCF_HT_INT)
}
pub fn push_format_float(&mut self, tag: &[u8], data: &[f32]) -> Result<(), TagWriteError> {
self.push_format(tag, data, htslib::BCF_HT_REAL)
}
pub fn push_format_char(&mut self, tag: &[u8], data: &[u8]) -> Result<(), TagWriteError> {
self.push_format(tag, data, htslib::BCF_HT_STR)
}
fn push_format<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<(), TagWriteError> {
unsafe {
if htslib::bcf_update_format(
self.header().inner,
self.inner,
ffi::CString::new(tag).unwrap().as_ptr() as *mut i8,
data.as_ptr() as *const ::std::os::raw::c_void,
data.len() as i32,
ht as i32,
) == 0
{
Ok(())
} else {
Err(TagWriteError::Some)
}
}
}
pub fn push_format_string<D: Borrow<[u8]>>(
&mut self,
tag: &[u8],
data: &[D],
) -> Result<(), TagWriteError> {
assert!(
data.len() > 0,
"given string data must have at least 1 element"
);
let c_data = data
.iter()
.map(|s| ffi::CString::new(s.borrow()).unwrap())
.collect::<Vec<ffi::CString>>();
let c_ptrs = c_data
.iter()
.map(|s| s.as_ptr() as *mut i8)
.collect::<Vec<*mut i8>>();
unsafe {
if htslib::bcf_update_format_string(
self.header().inner,
self.inner,
ffi::CString::new(tag).unwrap().as_ptr() as *mut i8,
c_ptrs.as_slice().as_ptr() as *mut *const i8,
data.len() as i32,
) == 0
{
Ok(())
} else {
Err(TagWriteError::Some)
}
}
}
pub fn push_info_integer(&mut self, tag: &[u8], data: &[i32]) -> Result<(), TagWriteError> {
self.push_info(tag, data, htslib::BCF_HT_INT)
}
pub fn clear_info_integer(&mut self, tag: &[u8]) -> Result<(), TagWriteError> {
self.push_info::<i32>(tag, &[], htslib::BCF_HT_INT)
}
pub fn push_info_float(&mut self, tag: &[u8], data: &[f32]) -> Result<(), TagWriteError> {
self.push_info(tag, data, htslib::BCF_HT_REAL)
}
pub fn clear_info_float(&mut self, tag: &[u8]) -> Result<(), TagWriteError> {
self.push_info::<u8>(tag, &[], htslib::BCF_HT_REAL)
}
fn push_info<T>(&mut self, tag: &[u8], data: &[T], ht: u32) -> Result<(), TagWriteError> {
unsafe {
if htslib::bcf_update_info(
self.header().inner,
self.inner,
ffi::CString::new(tag).unwrap().as_ptr() as *mut i8,
data.as_ptr() as *const ::std::os::raw::c_void,
data.len() as i32,
ht as i32,
) == 0
{
Ok(())
} else {
Err(TagWriteError::Some)
}
}
}
pub fn push_info_flag(&mut self, tag: &[u8]) -> Result<(), TagWriteError> {
self.push_info_string_impl(tag, &["".as_bytes()], htslib::BCF_HT_FLAG)
}
pub fn clear_info_flag(&mut self, tag: &[u8]) -> Result<(), TagWriteError> {
self.push_info_string_impl(tag, &[], htslib::BCF_HT_FLAG)
}
pub fn push_info_string(&mut self, tag: &[u8], data: &[&[u8]]) -> Result<(), TagWriteError> {
self.push_info_string_impl(tag, data, htslib::BCF_HT_STR)
}
pub fn clear_info_string(&mut self, tag: &[u8]) -> Result<(), TagWriteError> {
self.push_info_string_impl(tag, &[], htslib::BCF_HT_STR)
}
fn push_info_string_impl(
&mut self,
tag: &[u8],
data: &[&[u8]],
ht: u32,
) -> Result<(), TagWriteError> {
let mut buf: Vec<u8> = Vec::new();
for (i, &s) in data.iter().enumerate() {
if i > 0 {
buf.extend(b",");
}
buf.extend(s);
}
let c_str = ffi::CString::new(buf).unwrap();
let len = if ht == htslib::BCF_HT_FLAG {
data.len()
} else {
c_str.to_bytes().len()
};
unsafe {
if htslib::bcf_update_info(
self.header().inner,
self.inner,
ffi::CString::new(tag).unwrap().as_ptr() as *mut i8,
c_str.as_ptr() as *const ::std::os::raw::c_void,
len as i32,
ht as i32,
) == 0
{
Ok(())
} else {
Err(TagWriteError::Some)
}
}
}
pub fn trim_alleles(&mut self) -> Result<(), RemoveAllelesError> {
match unsafe { htslib::bcf_trim_alleles(self.header().inner, self.inner) } {
-1 => Err(RemoveAllelesError::Some),
_ => Ok(()),
}
}
pub fn remove_alleles(&mut self, remove: &[bool]) -> Result<(), RemoveAllelesError> {
let rm_set = unsafe { htslib::kbs_init(remove.len()) };
for (i, &r) in remove.iter().enumerate() {
if r {
unsafe {
htslib::kbs_insert(rm_set, i as i32);
}
}
}
let ret = unsafe { htslib::bcf_remove_allele_set(self.header().inner, self.inner, rm_set) };
unsafe {
htslib::kbs_destroy(rm_set);
}
match ret {
-1 => Err(RemoveAllelesError::Some),
_ => Ok(()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GenotypeAllele {
Unphased(i32),
Phased(i32),
UnphasedMissing,
PhasedMissing,
}
impl GenotypeAllele {
pub fn from_encoded(encoded: i32) -> Self {
match (encoded, encoded & 1) {
(0, 0) => GenotypeAllele::UnphasedMissing,
(1, 1) => GenotypeAllele::PhasedMissing,
(e, 1) => GenotypeAllele::Phased((e >> 1) - 1),
(e, 0) => GenotypeAllele::Unphased((e >> 1) - 1),
_ => panic!("unexpected phasing type"),
}
}
pub fn index(&self) -> Option<u32> {
match self {
&GenotypeAllele::Unphased(i) => Some(i as u32),
&GenotypeAllele::Phased(i) => Some(i as u32),
&GenotypeAllele::UnphasedMissing => None,
&GenotypeAllele::PhasedMissing => None,
}
}
}
impl fmt::Display for GenotypeAllele {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.index() {
Some(a) => write!(f, "{}", a),
None => write!(f, "."),
}
}
}
custom_derive! {
#[derive(NewtypeDeref, Debug, Clone, PartialEq, Eq, Hash)]
pub struct Genotype(Vec<GenotypeAllele>);
}
impl fmt::Display for Genotype {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let &Genotype(ref alleles) = self;
try!(write!(f, "{}", alleles[0]));
for a in &alleles[1..] {
let sep = match a {
&GenotypeAllele::Phased(_) => '|',
&GenotypeAllele::Unphased(_) => '/',
&GenotypeAllele::UnphasedMissing => '/',
&GenotypeAllele::PhasedMissing => '|',
};
try!(write!(f, "{}{}", sep, a));
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct Genotypes<'a> {
encoded: Vec<&'a [i32]>,
}
impl<'a> Genotypes<'a> {
pub fn get(&self, i: usize) -> Genotype {
let igt = self.encoded[i];
let gt = Genotype(
igt.into_iter()
.map(|&e| GenotypeAllele::from_encoded(e))
.collect_vec(),
);
gt
}
}
impl Drop for Record {
fn drop(&mut self) {
if !self.buffer.is_null() {
unsafe { ::libc::free(self.buffer as *mut ::libc::c_void) };
}
unsafe { htslib::bcf_destroy(self.inner) };
}
}
unsafe impl Send for Record {}
unsafe impl Sync for Record {}
#[derive(Debug)]
pub struct Info<'a> {
record: &'a mut Record,
tag: &'a [u8],
}
impl<'a> Info<'a> {
fn data(&mut self, data_type: u32) -> Result<Option<(usize, i32)>, InfoReadError> {
let mut n: i32 = self.record.buffer_len;
let ret = unsafe {
htslib::bcf_get_info_values(
self.record.header().inner,
self.record.inner,
ffi::CString::new(self.tag).unwrap().as_ptr() as *mut i8,
&mut self.record.buffer,
&mut n,
data_type as i32,
)
};
self.record.buffer_len = n;
match ret {
-1 => Err(InfoReadError::UndefinedTag),
-2 => Err(InfoReadError::UnexpectedType),
-3 => Ok(None),
ret => Ok(Some((n as usize, ret))),
}
}
pub fn integer(&mut self) -> Result<Option<&'a [i32]>, InfoReadError> {
self.data(htslib::BCF_HT_INT).map(|data| {
data.map(|(n, _)| {
trim_slice(unsafe { slice::from_raw_parts(self.record.buffer as *const i32, n) })
})
})
}
pub fn float(&mut self) -> Result<Option<&'a [f32]>, InfoReadError> {
self.data(htslib::BCF_HT_REAL).map(|data| {
data.map(|(n, _)| {
trim_slice(unsafe { slice::from_raw_parts(self.record.buffer as *const f32, n) })
})
})
}
pub fn flag(&mut self) -> Result<bool, InfoReadError> {
self.data(htslib::BCF_HT_FLAG).map(|data| match data {
Some((_, ret)) => ret == 1,
None => false,
})
}
pub fn string(&mut self) -> Result<Option<Vec<&'a [u8]>>, InfoReadError> {
self.data(htslib::BCF_HT_STR).map(|data| {
data.map(|(n, ret)| {
unsafe { slice::from_raw_parts(self.record.buffer as *const u8, ret as usize) }
.chunks(n)
.map(|s| {
s.split(|c| *c == 0u8)
.next()
.expect("Bug: returned string should not be empty.")
})
.collect()
})
})
}
}
unsafe impl<'a> Send for Info<'a> {}
unsafe impl<'a> Sync for Info<'a> {}
fn trim_slice<T: PartialEq + NumericUtils>(s: &[T]) -> &[T] {
s.split(|v| v.is_vector_end())
.next()
.expect("Bug: returned slice should not be empty.")
}
#[derive(Debug)]
pub struct Format<'a> {
record: &'a mut Record,
tag: &'a [u8],
inner: *mut htslib::bcf_fmt_t,
}
impl<'a> Format<'a> {
fn new(record: &'a mut Record, tag: &'a [u8]) -> Format<'a> {
let inner = unsafe {
htslib::bcf_get_fmt(
record.header().inner,
record.inner,
ffi::CString::new(tag).unwrap().as_ptr() as *mut i8,
)
};
Format {
record: record,
tag: tag,
inner: inner,
}
}
pub fn inner(&self) -> &htslib::bcf_fmt_t {
unsafe { &*self.inner }
}
pub fn inner_mut(&mut self) -> &mut htslib::bcf_fmt_t {
unsafe { &mut *self.inner }
}
fn values_per_sample(&self) -> usize {
self.inner().n as usize
}
fn data(&mut self, data_type: u32) -> Result<(usize, i32), FormatReadError> {
let mut n: i32 = self.record.buffer_len;
let ret = unsafe {
htslib::bcf_get_format_values(
self.record.header().inner,
self.record.inner,
ffi::CString::new(self.tag).unwrap().as_ptr() as *mut i8,
&mut self.record.buffer,
&mut n,
data_type as i32,
)
};
self.record.buffer_len = n;
match ret {
-1 => Err(FormatReadError::UndefinedTag),
-2 => Err(FormatReadError::UnexpectedType),
-3 => Err(FormatReadError::MissingTag),
ret => Ok((n as usize, ret)),
}
}
pub fn integer(&mut self) -> Result<Vec<&'a [i32]>, FormatReadError> {
self.data(htslib::BCF_HT_INT).map(|(n, _)| {
unsafe { slice::from_raw_parts(self.record.buffer as *const i32, n) }
.chunks(self.values_per_sample())
.map(|s| trim_slice(s))
.collect()
})
}
pub fn float(&mut self) -> Result<Vec<&'a [f32]>, FormatReadError> {
self.data(htslib::BCF_HT_REAL).map(|(n, _)| {
unsafe { slice::from_raw_parts(self.record.buffer as *const f32, n) }
.chunks(self.values_per_sample())
.map(|s| trim_slice(s))
.collect()
})
}
pub fn string(&mut self) -> Result<Vec<&'a [u8]>, FormatReadError> {
self.data(htslib::BCF_HT_STR).map(|(n, _)| {
unsafe { slice::from_raw_parts(self.record.buffer as *const u8, n) }
.chunks(self.values_per_sample())
.map(|s| {
s.split(|c| *c == 0u8)
.next()
.expect("Bug: returned string should not be empty.")
})
.collect()
})
}
}
unsafe impl<'a> Send for Format<'a> {}
unsafe impl<'a> Sync for Format<'a> {}
#[derive(Debug)]
pub struct Filters<'a> {
record: &'a Record,
idx: i32,
}
impl<'a> Filters<'a> {
pub fn new(record: &'a Record) -> Self {
Filters { record, idx: 0 }
}
}
impl<'a> Iterator for Filters<'a> {
type Item = Id;
fn next(&mut self) -> Option<Id> {
if self.record.inner().d.n_flt <= self.idx {
None
} else {
let i = self.idx as isize;
self.idx += 1;
Some(Id(unsafe { *self.record.inner().d.flt.offset(i) } as u32))
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum IterFilterError {
Some {
description("problem enumerating FILTER entries")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum InfoReadError {
UndefinedTag {
description("tag undefined in header")
}
UnexpectedType {
description("tag type differs from header definition")
}
ReadFailed {
description("discrepancy between allocated memory and written data for INFO tag \
(this is likely a bug in htslib, see \
https://github.com/samtools/htslib/issues/832)")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum FormatReadError {
UndefinedTag {
description("tag undefined in header")
}
UnexpectedType {
description("tag type differs from header definition")
}
MissingTag {
description("tag missing from record")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum TagWriteError {
Some {
description("error writing tag to record")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum IdWriteError {
Some {
description("error writing ID to record")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum AlleleWriteError {
Some {
description("error writing alleles to record")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum FilterWriteError {
Some {
description("error writing filters to record")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum RemoveAllelesError {
Some {
description("error trimming alleles")
}
}
}