use std::ffi;
use std::slice;
use std::str;
use crate::htslib;
use linear_map::LinearMap;
use crate::bcf::{errors::Error, Result};
pub type SampleSubset = Vec<i32>;
custom_derive! {
#[derive(
NewtypeFrom,
NewtypeDeref,
PartialEq,
PartialOrd,
Eq,
Ord,
Copy,
Clone,
Debug
)]
pub struct Id(pub u32);
}
#[derive(Debug)]
pub struct Header {
pub inner: *mut htslib::bcf_hdr_t,
pub subset: Option<SampleSubset>,
}
impl Default for Header {
fn default() -> Self {
Self::new()
}
}
impl Header {
pub fn new() -> Self {
let c_str = ffi::CString::new(&b"w"[..]).unwrap();
Header {
inner: unsafe { htslib::bcf_hdr_init(c_str.as_ptr()) },
subset: None,
}
}
pub fn from_template(header: &HeaderView) -> Self {
Header {
inner: unsafe { htslib::bcf_hdr_dup(header.inner) },
subset: None,
}
}
pub fn from_template_subset(header: &HeaderView, samples: &[&[u8]]) -> Result<Self> {
let mut imap = vec![0; samples.len()];
let names: Vec<_> = samples
.iter()
.map(|&s| ffi::CString::new(s).unwrap())
.collect();
let name_pointers: Vec<_> = names.iter().map(|s| s.as_ptr() as *mut i8).collect();
let inner = unsafe {
htslib::bcf_hdr_subset(
header.inner,
samples.len() as i32,
name_pointers.as_ptr() as *const *mut i8,
imap.as_mut_ptr() as *mut i32,
)
};
if inner.is_null() {
Err(Error::DuplicateSampleNames)
} else {
Ok(Header {
inner,
subset: Some(imap),
})
}
}
pub fn push_sample(&mut self, sample: &[u8]) -> &mut Self {
let c_str = ffi::CString::new(sample).unwrap();
unsafe { htslib::bcf_hdr_add_sample(self.inner, c_str.as_ptr()) };
self
}
pub fn push_record(&mut self, record: &[u8]) -> &mut Self {
let c_str = ffi::CString::new(record).unwrap();
unsafe { htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) };
self
}
pub fn remove_filter(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_FLT)
}
pub fn remove_info(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_INFO)
}
pub fn remove_format(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_FMT)
}
pub fn remove_contig(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_CTG)
}
pub fn remove_structured(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_STR)
}
pub fn remove_generic(&mut self, tag: &[u8]) -> &mut Self {
self.remove_impl(tag, htslib::BCF_HL_GEN)
}
fn remove_impl(&mut self, tag: &[u8], type_: u32) -> &mut Self {
unsafe {
let v = tag.to_vec();
let c_str = ffi::CString::new(v).unwrap();
htslib::bcf_hdr_remove(self.inner, type_ as i32, c_str.as_ptr());
}
self
}
}
impl Drop for Header {
fn drop(&mut self) {
unsafe { htslib::bcf_hdr_destroy(self.inner) };
}
}
#[derive(Debug)]
pub enum HeaderRecord {
Filter {
key: String,
values: LinearMap<String, String>,
},
Info {
key: String,
values: LinearMap<String, String>,
},
Format {
key: String,
values: LinearMap<String, String>,
},
Contig {
key: String,
values: LinearMap<String, String>,
},
Structured {
key: String,
values: LinearMap<String, String>,
},
Generic { key: String, value: String },
}
#[derive(Debug)]
pub struct HeaderView {
pub inner: *mut htslib::bcf_hdr_t,
}
impl HeaderView {
pub fn new(inner: *mut htslib::bcf_hdr_t) -> Self {
HeaderView { inner }
}
#[inline]
fn inner(&self) -> htslib::bcf_hdr_t {
unsafe { *self.inner }
}
pub fn sample_count(&self) -> u32 {
self.inner().n[htslib::BCF_DT_SAMPLE as usize] as u32
}
pub fn samples(&self) -> Vec<&[u8]> {
let names =
unsafe { slice::from_raw_parts(self.inner().samples, self.sample_count() as usize) };
names
.iter()
.map(|name| unsafe { ffi::CStr::from_ptr(*name).to_bytes() })
.collect()
}
pub fn sample_id(&self, sample: &[u8]) -> Option<usize> {
self.samples().iter().position(|s| *s == sample)
}
pub fn contig_count(&self) -> u32 {
self.inner().n[htslib::BCF_DT_CTG as usize] as u32
}
pub fn rid2name(&self, rid: u32) -> Result<&[u8]> {
if rid <= self.contig_count() {
unsafe {
let dict = self.inner().id[htslib::BCF_DT_CTG as usize];
let ptr = (*dict.offset(rid as isize)).key;
Ok(ffi::CStr::from_ptr(ptr).to_bytes())
}
} else {
Err(Error::UnknownRID { rid })
}
}
pub fn name2rid(&self, name: &[u8]) -> Result<u32> {
let c_str = ffi::CString::new(name).unwrap();
unsafe {
match htslib::bcf_hdr_id2int(
self.inner,
htslib::BCF_DT_CTG as i32,
c_str.as_ptr() as *mut i8,
) {
-1 => Err(Error::UnknownContig {
contig: str::from_utf8(name).unwrap().to_owned(),
}),
i => Ok(i as u32),
}
}
}
pub fn info_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
self.tag_type(tag, htslib::BCF_HL_INFO)
}
pub fn format_type(&self, tag: &[u8]) -> Result<(TagType, TagLength)> {
self.tag_type(tag, htslib::BCF_HL_FMT)
}
fn tag_type(&self, tag: &[u8], hdr_type: ::libc::c_uint) -> Result<(TagType, TagLength)> {
let tag_desc = || str::from_utf8(tag).unwrap().to_owned();
let c_str_tag = ffi::CString::new(tag).unwrap();
let (_type, length, num_values) = unsafe {
let id = htslib::bcf_hdr_id2int(
self.inner,
htslib::BCF_DT_ID as i32,
c_str_tag.as_ptr() as *mut i8,
);
if id < 0 {
return Err(Error::UndefinedTag { tag: tag_desc() });
}
let n = (*self.inner).n[htslib::BCF_DT_ID as usize] as usize;
let entry = slice::from_raw_parts((*self.inner).id[htslib::BCF_DT_ID as usize], n);
let d = (*entry[id as usize].val).info[hdr_type as usize];
(d >> 4 & 0xf, d >> 8 & 0xf, d >> 12)
};
let _type = match _type as ::libc::c_uint {
htslib::BCF_HT_FLAG => TagType::Flag,
htslib::BCF_HT_INT => TagType::Integer,
htslib::BCF_HT_REAL => TagType::Float,
htslib::BCF_HT_STR => TagType::String,
_ => return Err(Error::UnexpectedType { tag: tag_desc() }),
};
let length = match length as ::libc::c_uint {
htslib::BCF_VL_FIXED => TagLength::Fixed(num_values as u32),
htslib::BCF_VL_VAR => TagLength::Variable,
htslib::BCF_VL_A => TagLength::AltAlleles,
htslib::BCF_VL_R => TagLength::Alleles,
htslib::BCF_VL_G => TagLength::Genotypes,
_ => return Err(Error::UnexpectedType { tag: tag_desc() }),
};
Ok((_type, length))
}
pub fn name_to_id(&self, id: &[u8]) -> Result<Id> {
let c_str = ffi::CString::new(id).unwrap();
unsafe {
match htslib::bcf_hdr_id2int(
self.inner,
htslib::BCF_DT_ID as i32,
c_str.as_ptr() as *const i8,
) {
-1 => Err(Error::UnknownID {
id: str::from_utf8(id).unwrap().to_owned(),
}),
i => Ok(Id(i as u32)),
}
}
}
pub fn id_to_name(&self, id: Id) -> Vec<u8> {
let key = unsafe {
ffi::CStr::from_ptr(
(*(*self.inner).id[htslib::BCF_DT_ID as usize].offset(*id as isize)).key,
)
};
key.to_bytes().to_vec()
}
pub fn sample_to_id(&self, id: &[u8]) -> Result<Id> {
let c_str = ffi::CString::new(id).unwrap();
unsafe {
match htslib::bcf_hdr_id2int(
self.inner,
htslib::BCF_DT_SAMPLE as i32,
c_str.as_ptr() as *const i8,
) {
-1 => Err(Error::UnknownSample {
name: str::from_utf8(id).unwrap().to_owned(),
}),
i => Ok(Id(i as u32)),
}
}
}
pub fn id_to_sample(&self, id: Id) -> Vec<u8> {
let key = unsafe {
ffi::CStr::from_ptr(
(*(*self.inner).id[htslib::BCF_DT_SAMPLE as usize].offset(*id as isize)).key,
)
};
key.to_bytes().to_vec()
}
pub fn header_records(&self) -> Vec<HeaderRecord> {
fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap<String, String> {
let mut result: LinearMap<String, String> = LinearMap::new();
for i in 0_i32..(rec.nkeys) {
let key = unsafe {
ffi::CStr::from_ptr(*rec.keys.offset(i as isize))
.to_str()
.unwrap()
.to_string()
};
let value = unsafe {
ffi::CStr::from_ptr(*rec.vals.offset(i as isize))
.to_str()
.unwrap()
.to_string()
};
result.insert(key, value);
}
result
}
let mut result: Vec<HeaderRecord> = Vec::new();
for i in 0_i32..unsafe { (*self.inner).nhrec } {
let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) };
let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() };
let record = match rec.type_ {
0 => HeaderRecord::Filter {
key,
values: parse_kv(rec),
},
1 => HeaderRecord::Info {
key,
values: parse_kv(rec),
},
2 => HeaderRecord::Format {
key,
values: parse_kv(rec),
},
3 => HeaderRecord::Contig {
key,
values: parse_kv(rec),
},
4 => HeaderRecord::Structured {
key,
values: parse_kv(rec),
},
5 => HeaderRecord::Generic {
key,
value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() },
},
_ => panic!("Unknown type: {}", rec.type_),
};
result.push(record);
}
result
}
}
impl Clone for HeaderView {
fn clone(&self) -> Self {
HeaderView {
inner: unsafe { htslib::bcf_hdr_dup(self.inner) },
}
}
}
impl Drop for HeaderView {
fn drop(&mut self) {
unsafe {
htslib::bcf_hdr_destroy(self.inner);
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum TagType {
Flag,
Integer,
Float,
String,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum TagLength {
Fixed(u32),
AltAlleles,
Alleles,
Genotypes,
Variable,
}